@@ -212,47 +212,81 @@ class FreeKVCacheBlockQueue:
212
212
def __init__ (self , blocks : list [KVCacheBlock ]) -> None :
213
213
self .num_free_blocks = len (blocks )
214
214
215
- # Initialize the doubly linked list of free blocks.
216
- self .free_list_head : Optional [KVCacheBlock ] = blocks [0 ]
217
- self .free_list_tail : Optional [KVCacheBlock ] = blocks [- 1 ]
215
+ # Initialize doubly links of consecutive blocks
218
216
for i in range (self .num_free_blocks ):
219
217
if i > 0 :
220
218
blocks [i ].prev_free_block = blocks [i - 1 ]
221
219
if i < self .num_free_blocks - 1 :
222
220
blocks [i ].next_free_block = blocks [i + 1 ]
223
221
222
+ # Create a fake head and a tail block for the doubly linked list to
223
+ # reduce branching in the code
224
+ #
225
+ # The implementation garenteed that the fake head and tail
226
+ # are NEVER got popped, so we could safely assume each real blocks
227
+ # in the queue has prev and next blocks.
228
+ self .fake_free_list_head = KVCacheBlock (block_id = - 1 )
229
+ self .fake_free_list_tail = KVCacheBlock (block_id = - 1 )
230
+ if self .num_free_blocks > 0 :
231
+ # Connect fake_head and fake_tail to the first and last block
232
+ # respectively.
233
+ self .fake_free_list_head .next_free_block = blocks [0 ]
234
+ blocks [0 ].prev_free_block = self .fake_free_list_head
235
+ self .fake_free_list_tail .prev_free_block = blocks [- 1 ]
236
+ blocks [- 1 ].next_free_block = self .fake_free_list_tail
237
+ else :
238
+ # For empty list, simply connect the fake head and tail.
239
+ self .fake_free_list_head .next_free_block = self .fake_free_list_tail
240
+ self .fake_free_list_tail .prev_free_block = self .fake_free_list_head
241
+
224
242
def popleft (self ) -> KVCacheBlock :
225
243
"""Pop the first free block and reduce num_free_blocks by 1.
226
244
227
245
Returns:
228
246
The first free block.
229
247
"""
230
- if not self .free_list_head :
248
+ if (self .fake_free_list_head .next_free_block
249
+ is self .fake_free_list_tail
250
+ or self .fake_free_list_head .next_free_block is None ):
251
+ assert self .num_free_blocks == 0 , (
252
+ f"num_free_blocks ({ self .num_free_blocks } ) is out of sync "
253
+ "with the free list." )
231
254
raise ValueError ("No free blocks available" )
232
255
233
- block = self .free_list_head
234
- self .remove (block )
235
- return block
256
+ first_block : KVCacheBlock = self .fake_free_list_head .next_free_block
257
+
258
+ if first_block .next_free_block is None :
259
+ # This should not happen if the block is from the free list.
260
+ # It indicates a bug in the caller's logic.
261
+ raise RuntimeError ("Invalid block found in popleft() "
262
+ "which doesn't have a valid next_free_block" )
263
+
264
+ # Connect fake_head and the next block of first_block (i.e. second block
265
+ # or fake tail).
266
+ self .fake_free_list_head .next_free_block = first_block .next_free_block
267
+ first_block .next_free_block .prev_free_block = self .fake_free_list_head
268
+
269
+ # Remove the block from the linked list.
270
+ first_block .prev_free_block = first_block .next_free_block = None
271
+
272
+ self .num_free_blocks -= 1
273
+ return first_block
236
274
237
275
def remove (self , block : KVCacheBlock ) -> None :
238
276
"""Remove a block in the free list and reduce num_free_blocks by 1.
239
277
240
278
Args:
241
279
block: The block to remove.
242
280
"""
243
- if block .prev_free_block is not None :
244
- # Link the previous block to the next block.
245
- block .prev_free_block .next_free_block = block .next_free_block
246
- if block .next_free_block is not None :
247
- # Link the next block to the previous block.
248
- block .next_free_block .prev_free_block = block .prev_free_block
249
-
250
- if block == self .free_list_head :
251
- # Update the head if the block is the head.
252
- self .free_list_head = block .next_free_block
253
- if block == self .free_list_tail :
254
- # Update the tail if the block is the tail.
255
- self .free_list_tail = block .prev_free_block
281
+ if block .prev_free_block is None or block .next_free_block is None :
282
+ # This should not happen if the block is from the free list.
283
+ # It indicates a bug in the caller's logic.
284
+ raise RuntimeError (f"remove() called on an invalid block: { block } " )
285
+
286
+ # Link the previous block to the next block.
287
+ block .prev_free_block .next_free_block = block .next_free_block
288
+ # Link the next block to the previous block.
289
+ block .next_free_block .prev_free_block = block .prev_free_block
256
290
257
291
# Remove the block from the linked list.
258
292
block .prev_free_block = block .next_free_block = None
@@ -265,17 +299,19 @@ def append(self, block: KVCacheBlock) -> None:
265
299
Args:
266
300
block: The block to append.
267
301
"""
268
- if self .free_list_tail is not None :
269
- # Link the last block to the new block.
270
- self .free_list_tail .next_free_block = block
271
- block .prev_free_block = self .free_list_tail
272
- self .free_list_tail = block
273
- else :
274
- # The free list is empty.
275
- assert self .free_list_head is None
276
- self .free_list_head = self .free_list_tail = block
302
+ if self .fake_free_list_tail .prev_free_block is None :
303
+ raise RuntimeError (
304
+ "prev_free_block of fake_free_list_tail should always exist" )
305
+ last_block : KVCacheBlock = self .fake_free_list_tail .prev_free_block
306
+
307
+ # Connect the new block after the last block.
308
+ last_block .next_free_block = block
309
+ block .prev_free_block = last_block
310
+
311
+ # Connect the fake tail after the new block.
312
+ block .next_free_block = self .fake_free_list_tail
313
+ self .fake_free_list_tail .prev_free_block = block
277
314
278
- block .next_free_block = None
279
315
self .num_free_blocks += 1
280
316
281
317
def get_all_free_blocks (self ) -> list [KVCacheBlock ]:
@@ -285,8 +321,14 @@ def get_all_free_blocks(self) -> list[KVCacheBlock]:
285
321
A list of free blocks.
286
322
"""
287
323
ret = []
288
- curr_block = self .free_list_head
289
- while curr_block is not None :
324
+ if self .fake_free_list_head .next_free_block is None :
325
+ raise RuntimeError (
326
+ "next_free_block of fake_free_list_head should always exist" )
327
+ # Start from the first block
328
+ curr_block : KVCacheBlock = self .fake_free_list_head .next_free_block
329
+ # As long as next_free_block is available, we haven't reached to
330
+ # the fake tail yet.
331
+ while curr_block .next_free_block is not None :
290
332
ret .append (curr_block )
291
333
curr_block = curr_block .next_free_block
292
334
return ret
0 commit comments