Skip to content

Commit 06b8f6e

Browse files
authored
Fix double slot release after request cancellation (#341)
* Fix double slot release after request cancellation * Simplify cleanup
1 parent 3555572 commit 06b8f6e

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

src/sequence_batch_scheduler/sequence_batch_scheduler.cc

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,24 @@ SequenceBatchScheduler::ReleaseSequenceSlot(
883883
{
884884
std::unique_lock<std::mutex> lock(mu_);
885885

886+
// If we are releasing the slot for a cancelled sequence,
887+
// we have to clean up the sequence
888+
// otherwise the reaper will try to clean it up again.
889+
if (!requests->empty() && requests->front()) {
890+
const InferenceRequest::SequenceId& corr_id =
891+
requests->front()->CorrelationId();
892+
LOG_VERBOSE(1) << "Releasing canceled sequence CORRID " << corr_id;
893+
894+
// Clean up the correlation id to sequence slot mapping, to avoid the reaper
895+
// from trying to release the same slot again on this instance of the
896+
// correlation id.
897+
sequence_to_batcherseqslot_map_.erase(corr_id);
898+
// Clean up the correlation id to sequence timeout timestamp mapping, to
899+
// avoid removal of a newer sequence from the backlog upon previous timeout
900+
// if the same id is re-used by the newer sequence.
901+
correlation_id_timestamps_.erase(corr_id);
902+
}
903+
886904
// If there are any remaining requests on the releasing sequence slot, those
887905
// requests will be cancelled.
888906
MarkRequestsCancelled(requests);
@@ -1969,7 +1987,11 @@ OldestSequenceBatch::CompleteAndNext(const uint32_t seq_slot)
19691987
<< model_instance_->Name() << ", slot " << seq_slot;
19701988
release_seq_slot = true;
19711989
} else if (irequest->IsCancelled()) {
1972-
LOG_VERBOSE(1) << "force-end cancelled sequence in batcher "
1990+
const InferenceRequest::SequenceId& correlation_id =
1991+
irequest->CorrelationId();
1992+
LOG_VERBOSE(1) << irequest->LogRequest()
1993+
<< "force-end cancelled sequence CORRID "
1994+
<< correlation_id << " in batcher "
19731995
<< model_instance_->Name() << ", slot " << seq_slot;
19741996
release_seq_slot = true;
19751997
retain_queue_front = true;

0 commit comments

Comments
 (0)