|
34 | 34 | from ray.serve._private.metrics_utils import InMemoryMetricsStore, MetricsPusher
|
35 | 35 | from ray.serve._private.replica_result import ReplicaResult
|
36 | 36 | from ray.serve._private.replica_scheduler import PendingRequest, ReplicaScheduler
|
37 |
| -from ray.serve._private.utils import generate_request_id, resolve_deployment_response |
| 37 | +from ray.serve._private.utils import ( |
| 38 | + generate_request_id, |
| 39 | + resolve_deployment_response, |
| 40 | + run_coroutine_or_future_threadsafe, |
| 41 | +) |
38 | 42 | from ray.serve.config import AutoscalingConfig
|
39 | 43 | from ray.serve.exceptions import BackPressureError, DeploymentUnavailableError
|
40 | 44 | from ray.util import metrics
|
@@ -695,12 +699,54 @@ def assign_request(
|
695 | 699 | *request_args,
|
696 | 700 | **request_kwargs,
|
697 | 701 | ) -> concurrent.futures.Future[ReplicaResult]:
|
698 |
| - return asyncio.run_coroutine_threadsafe( |
| 702 | + """Schedules assign_request call on the internal asyncio loop. |
| 703 | +
|
| 704 | + This method uses `run_coroutine_threadsafe` to execute the actual request |
| 705 | + assignment logic (`_asyncio_router.assign_request`) on the dedicated |
| 706 | + asyncio event loop thread. It returns a `concurrent.futures.Future` that |
| 707 | + can be awaited or queried from the calling thread. |
| 708 | +
|
| 709 | + Returns: |
| 710 | + A concurrent.futures.Future resolving to the ReplicaResult representing |
| 711 | + the assigned request. |
| 712 | + """ |
| 713 | + |
| 714 | + def asyncio_future_callback( |
| 715 | + asyncio_future: asyncio.Future, concurrent_future: concurrent.futures.Future |
| 716 | + ): |
| 717 | + """Callback attached to the asyncio Task running assign_request. |
| 718 | +
|
| 719 | + This runs when the asyncio Task finishes (completes, fails, or is cancelled). |
| 720 | + Its primary goal is to propagate cancellation initiated via the |
| 721 | + `concurrent_future` back to the `ReplicaResult` in situations where |
| 722 | + asyncio_future didn't see the cancellation event in time. Think of it |
| 723 | + like a second line of defense for cancellation of replica results. |
| 724 | + """ |
| 725 | + # Check if the cancellation originated from the concurrent.futures.Future |
| 726 | + if ( |
| 727 | + concurrent_future.cancelled() |
| 728 | + and not asyncio_future.cancelled() |
| 729 | + and asyncio_future.exception() is None |
| 730 | + ): |
| 731 | + result: ReplicaResult = asyncio_future.result() |
| 732 | + logger.info( |
| 733 | + "Asyncio task completed despite cancellation attempt. " |
| 734 | + "Attempting to cancel the request that was assigned to a replica." |
| 735 | + ) |
| 736 | + result.cancel() |
| 737 | + |
| 738 | + task = self._asyncio_loop.create_task( |
699 | 739 | self._asyncio_router.assign_request(
|
700 | 740 | request_meta, *request_args, **request_kwargs
|
701 |
| - ), |
| 741 | + ) |
| 742 | + ) |
| 743 | + # Schedule the actual request assignment coroutine on the asyncio loop thread. |
| 744 | + concurrent_future = run_coroutine_or_future_threadsafe( |
| 745 | + task, |
702 | 746 | loop=self._asyncio_loop,
|
703 | 747 | )
|
| 748 | + task.add_done_callback(lambda _: asyncio_future_callback(_, concurrent_future)) |
| 749 | + return concurrent_future |
704 | 750 |
|
705 | 751 | def shutdown(self) -> concurrent.futures.Future:
|
706 | 752 | return asyncio.run_coroutine_threadsafe(
|
|
0 commit comments