diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index e93143c83d9f..c08978876607 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -736,11 +736,6 @@ def _schedule_running( # NOTE(woosuk): Preemption happens only when there is no available # slot to keep all the sequence groups in the RUNNING state. while not self._can_append_slots(seq_group, enable_chunking): - budget.subtract_num_batched_tokens(seq_group.request_id, - num_running_tokens) - num_running_seqs = seq_group.get_max_num_running_seqs() - budget.subtract_num_seqs(seq_group.request_id, - num_running_seqs) if (curr_loras is not None and seq_group.lora_int_id > 0 and seq_group.lora_int_id in curr_loras):