From 4a8e39f200a0d882fd27abf95e4597e595690f57 Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Fri, 21 Mar 2025 12:22:09 -0400 Subject: [PATCH] fix mamba crash due to finished req ids handling Signed-off-by: Benjamin Chislett --- vllm/engine/async_llm_engine.py | 7 +++++-- vllm/engine/llm_engine.py | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 91b9cc62719a..b521cab5ed25 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -303,8 +303,11 @@ async def step_async( ctx.seq_group_metadata_list = seq_group_metadata_list ctx.scheduler_outputs = scheduler_outputs - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() + if scheduler_outputs.is_empty(): + finished_requests_ids = [] + else: + finished_requests_ids = self.scheduler[ + virtual_engine].get_and_reset_finished_requests_ids() # Maybe switch from async mode to sync mode if not allow_async_output_proc and len(ctx.output_queue) > 0: diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index b9a8b6a53065..5bce345cbfe5 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1380,8 +1380,12 @@ def step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]: ctx.seq_group_metadata_list = seq_group_metadata_list ctx.scheduler_outputs = scheduler_outputs - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() + if scheduler_outputs.is_empty(): + finished_requests_ids = [] + else: + finished_requests_ids = self.scheduler[ + virtual_engine].get_and_reset_finished_requests_ids() + # When n>1, elements in self.seq_id_to_seq_group should be deleted # here, otherwise memory leaks. for finished_request_id in finished_requests_ids: