1
1
# SPDX-License-Identifier: Apache-2.0
2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
4
+ from abc import ABC , abstractmethod
4
5
from collections import defaultdict
5
6
from dataclasses import dataclass
6
7
from typing import Optional
@@ -64,7 +65,72 @@ def new_empty(self) -> "KVCacheBlocks":
64
65
return KVCacheBlocks (tuple ([] for _ in range (len (self .blocks ))))
65
66
66
67
67
- class DummyKVCacheManager :
68
+ class KVCacheManagerInterface (ABC ):
69
+
70
+ @abstractmethod
71
+ def usage (self ) -> float :
72
+ raise NotImplementedError
73
+
74
+ @abstractmethod
75
+ def make_prefix_cache_stats (self ) -> Optional [PrefixCacheStats ]:
76
+ raise NotImplementedError
77
+
78
+ @abstractmethod
79
+ def get_computed_blocks (self ,
80
+ request : Request ) -> tuple [KVCacheBlocks , int ]:
81
+ raise NotImplementedError
82
+
83
+ @abstractmethod
84
+ def allocate_slots (
85
+ self ,
86
+ request : Request ,
87
+ num_new_tokens : int ,
88
+ num_new_computed_tokens : int = 0 ,
89
+ new_computed_blocks : Optional [KVCacheBlocks ] = None ,
90
+ num_draft_tokens : int = 0 ,
91
+ num_lookahead_tokens : int = 0 ,
92
+ delay_cache_blocks : bool = False ,
93
+ ) -> Optional [KVCacheBlocks ]:
94
+ raise NotImplementedError
95
+
96
+ @abstractmethod
97
+ def free (self , request : Request ) -> None :
98
+ raise NotImplementedError
99
+
100
+ @abstractmethod
101
+ def reset_prefix_cache (self ) -> bool :
102
+ raise NotImplementedError
103
+
104
+ @abstractmethod
105
+ def get_num_common_prefix_blocks (
106
+ self ,
107
+ request : Request ,
108
+ num_running_requests : int ,
109
+ ) -> list [int ]:
110
+ raise NotImplementedError
111
+
112
+ @abstractmethod
113
+ def free_block_hashes (self , request : Request ) -> None :
114
+ raise NotImplementedError
115
+
116
+ @abstractmethod
117
+ def take_events (self ) -> list [KVCacheEvent ]:
118
+ raise NotImplementedError
119
+
120
+ @abstractmethod
121
+ def get_block_ids (self , request_id : str ) -> tuple [list [int ], ...]:
122
+ raise NotImplementedError
123
+
124
+ @abstractmethod
125
+ def cache_blocks (self , request : Request , num_computed_tokens : int ) -> None :
126
+ raise NotImplementedError
127
+
128
+ @abstractmethod
129
+ def create_empty_block_list (self ) -> KVCacheBlocks :
130
+ raise NotImplementedError
131
+
132
+
133
+ class DummyKVCacheManager (KVCacheManagerInterface ):
68
134
69
135
@property
70
136
def usage (self ) -> float :
@@ -88,7 +154,7 @@ def allocate_slots(
88
154
delay_cache_blocks : bool = False ,
89
155
) -> Optional [KVCacheBlocks ]:
90
156
#if we do not return a KV cache block requests are unschedulable
91
- return KVCacheBlocks ([KVCacheBlock (block_id = 0 )])
157
+ return KVCacheBlocks (tuple ( [KVCacheBlock (block_id = 0 )]) )
92
158
93
159
def free (self , request : Request ) -> None :
94
160
pass
@@ -109,20 +175,20 @@ def free_block_hashes(self, request: Request) -> None:
109
175
def take_events (self ) -> list [KVCacheEvent ]:
110
176
return []
111
177
112
- def get_block_ids (self , request_id : str ) -> list [list [int ]]:
178
+ def get_block_ids (self , request_id : str ) -> tuple [list [int ], ... ]:
113
179
"""Get the block ids of a request."""
114
- return []
180
+ return tuple ([])
115
181
116
182
def cache_blocks (self , request : Request , num_computed_tokens : int ) -> None :
117
183
"""Cache the blocks for the request, if enabled."""
118
184
pass
119
185
120
186
def create_empty_block_list (self ) -> KVCacheBlocks :
121
187
"""Creates a new KVCacheBlocks instance with no blocks."""
122
- return ( KVCacheBlocks ([]), 0 )
188
+ return KVCacheBlocks (tuple ( []))
123
189
124
190
125
- class KVCacheManager :
191
+ class KVCacheManager ( KVCacheManagerInterface ) :
126
192
127
193
def __init__ (
128
194
self ,
0 commit comments