diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index a9600a2c8aa3..fc0f26cf9785 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1339,8 +1339,12 @@ def step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]: ctx.seq_group_metadata_list = seq_group_metadata_list ctx.scheduler_outputs = scheduler_outputs - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() + if scheduler_outputs.is_empty(): + finished_requests_ids = [] + else: + finished_requests_ids = self.scheduler[ + virtual_engine].get_and_reset_finished_requests_ids() + # When n>1, elements in self.seq_id_to_seq_group should be deleted # here, otherwise memory leaks. for finished_request_id in finished_requests_ids: