Skip to content

Commit cda9230

Browse files
authored
[Misc] Update lmcache connector with the latest connector apis (#19441)
Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn>
1 parent bf57ccc commit cda9230

File tree

1 file changed

+34
-1
lines changed

1 file changed

+34
-1
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
from typing import TYPE_CHECKING
3+
from typing import TYPE_CHECKING, Any, Optional
44

55
import torch
66
from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl
@@ -87,6 +87,22 @@ def wait_for_save(self):
8787
"""
8888
self._lmcache_engine.wait_for_save()
8989

90+
def get_finished(
91+
self, finished_req_ids: set[str]
92+
) -> tuple[Optional[set[str]], Optional[set[str]]]:
93+
"""
94+
Notifies worker-side connector ids of requests that have
95+
finished generating tokens.
96+
97+
Returns:
98+
ids of requests that have finished asynchronous transfer
99+
(requests that previously returned True from request_finished()),
100+
tuple of (sending/saving ids, recving/loading ids).
101+
The finished saves/sends req ids must belong to a set provided in a
102+
call to this method (this call or a prior one).
103+
"""
104+
return self._lmcache_engine.get_finished(finished_req_ids)
105+
90106
# ==============================
91107
# Scheduler-side methods
92108
# ==============================
@@ -132,3 +148,20 @@ def build_connector_meta(
132148
scheduler_output (SchedulerOutput): the scheduler output object.
133149
"""
134150
return self._lmcache_engine.build_connector_meta(scheduler_output)
151+
152+
def request_finished(
153+
self,
154+
request: "Request",
155+
block_ids: list[int],
156+
) -> tuple[bool, Optional[dict[str, Any]]]:
157+
"""
158+
Called when a request has finished, before its blocks are freed.
159+
160+
Returns:
161+
True if the request is being saved/sent asynchronously and blocks
162+
should not be freed until the request_id is returned from
163+
get_finished().
164+
Optional KVTransferParams to be included in the request outputs
165+
returned by the engine.
166+
"""
167+
return self._lmcache_engine.request_finished(request, block_ids)

0 commit comments

Comments
 (0)