Skip to content

Commit 7e8d97d

Browse files
authored
[BugFix] Honor enable_caching in connector-delayed kvcache load case (#19435)
Signed-off-by: Nick Hill <nhill@redhat.com>
1 parent d70bc7c commit 7e8d97d

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

vllm/v1/core/kv_cache_manager.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -381,10 +381,11 @@ def get_block_ids(self, request_id: str) -> tuple[list[int], ...]:
381381
self.coordinator.get_blocks(request_id)).get_block_ids()
382382

383383
def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
384-
"""Cache the blocks for the request."""
385-
block_hashes = self.req_to_block_hashes[request.request_id]
386-
self.coordinator.cache_blocks(request, block_hashes,
387-
num_computed_tokens)
384+
"""Cache the blocks for the request, if enabled."""
385+
if self.enable_caching:
386+
block_hashes = self.req_to_block_hashes[request.request_id]
387+
self.coordinator.cache_blocks(request, block_hashes,
388+
num_computed_tokens)
388389

389390
def create_empty_block_list(self) -> KVCacheBlocks:
390391
"""Creates a new KVCacheBlocks instance with no blocks."""

vllm/v1/core/sched/scheduler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ def _update_waiting_for_remote_kv(self, request: Request) -> bool:
10151015
num_computed_tokens = min(num_computed_tokens, request.num_tokens)
10161016
if num_computed_tokens == request.num_tokens:
10171017
num_computed_tokens -= 1
1018+
# This will cache the blocks iff caching is enabled.
10181019
self.kv_cache_manager.cache_blocks(request, num_computed_tokens)
10191020

10201021
# Update the request state for scheduling.

0 commit comments

Comments
 (0)