|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
3 |
| -from typing import TYPE_CHECKING |
| 3 | +from typing import TYPE_CHECKING, Any, Optional |
4 | 4 |
|
5 | 5 | import torch
|
6 | 6 | from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl
|
@@ -87,6 +87,22 @@ def wait_for_save(self):
|
87 | 87 | """
|
88 | 88 | self._lmcache_engine.wait_for_save()
|
89 | 89 |
|
| 90 | + def get_finished( |
| 91 | + self, finished_req_ids: set[str] |
| 92 | + ) -> tuple[Optional[set[str]], Optional[set[str]]]: |
| 93 | + """ |
| 94 | + Notifies worker-side connector ids of requests that have |
| 95 | + finished generating tokens. |
| 96 | +
|
| 97 | + Returns: |
| 98 | + ids of requests that have finished asynchronous transfer |
| 99 | + (requests that previously returned True from request_finished()), |
| 100 | + tuple of (sending/saving ids, recving/loading ids). |
| 101 | + The finished saves/sends req ids must belong to a set provided in a |
| 102 | + call to this method (this call or a prior one). |
| 103 | + """ |
| 104 | + return self._lmcache_engine.get_finished(finished_req_ids) |
| 105 | + |
90 | 106 | # ==============================
|
91 | 107 | # Scheduler-side methods
|
92 | 108 | # ==============================
|
@@ -132,3 +148,20 @@ def build_connector_meta(
|
132 | 148 | scheduler_output (SchedulerOutput): the scheduler output object.
|
133 | 149 | """
|
134 | 150 | return self._lmcache_engine.build_connector_meta(scheduler_output)
|
| 151 | + |
| 152 | + def request_finished( |
| 153 | + self, |
| 154 | + request: "Request", |
| 155 | + block_ids: list[int], |
| 156 | + ) -> tuple[bool, Optional[dict[str, Any]]]: |
| 157 | + """ |
| 158 | + Called when a request has finished, before its blocks are freed. |
| 159 | +
|
| 160 | + Returns: |
| 161 | + True if the request is being saved/sent asynchronously and blocks |
| 162 | + should not be freed until the request_id is returned from |
| 163 | + get_finished(). |
| 164 | + Optional KVTransferParams to be included in the request outputs |
| 165 | + returned by the engine. |
| 166 | + """ |
| 167 | + return self._lmcache_engine.request_finished(request, block_ids) |
0 commit comments