|
1 |
| -// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 1 | +// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | //
|
3 | 3 | // Redistribution and use in source and binary forms, with or without
|
4 | 4 | // modification, are permitted provided that the following conditions
|
@@ -67,6 +67,20 @@ FinishSkippedRequests(
|
67 | 67 | }
|
68 | 68 | }
|
69 | 69 |
|
| 70 | +void |
| 71 | +FinishRejectedCancelledRequests( |
| 72 | + std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& |
| 73 | + rejected_requests, |
| 74 | + std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& |
| 75 | + cancelled_requests) |
| 76 | +{ |
| 77 | + const static Status rejected_status = |
| 78 | + Status(Status::Code::UNAVAILABLE, "Request timeout expired"); |
| 79 | + const static Status cancelled_status = Status(Status::Code::CANCELLED); |
| 80 | + FinishSkippedRequests(std::move(rejected_requests), rejected_status); |
| 81 | + FinishSkippedRequests(std::move(cancelled_requests), cancelled_status); |
| 82 | +} |
| 83 | + |
70 | 84 | DynamicBatchScheduler::DynamicBatchScheduler(
|
71 | 85 | TritonModel* model, TritonModelInstance* model_instance,
|
72 | 86 | const bool dynamic_batching_enabled, const int32_t max_batch_size,
|
@@ -317,10 +331,6 @@ DynamicBatchScheduler::BatcherThread(const int nice)
|
317 | 331 | }
|
318 | 332 | }
|
319 | 333 |
|
320 |
| - auto wait_for_slots = [this]() { |
321 |
| - return model_->Server()->GetRateLimiter()->PayloadSlotAvailable( |
322 |
| - model_, model_instance_, queue_.SupportPrefetching()); |
323 |
| - }; |
324 | 334 | const uint64_t default_wait_microseconds = 500 * 1000;
|
325 | 335 |
|
326 | 336 | while (!scheduler_thread_exit_.load()) {
|
@@ -359,18 +369,7 @@ DynamicBatchScheduler::BatcherThread(const int nice)
|
359 | 369 | continue;
|
360 | 370 | }
|
361 | 371 |
|
362 |
| - { |
363 |
| - // The wait_for_slots conditional can be blocking till the slots |
364 |
| - // are available for execution. Need to explicitly release the |
365 |
| - // outer lock to allow Enqueue threads above to make progress. |
366 |
| - lock.unlock(); |
367 |
| - // Use slot lock to wait for the slot availability. |
368 |
| - std::mutex slot_mu; |
369 |
| - std::unique_lock<std::mutex> slot_lock(slot_mu); |
370 |
| - cv_.wait(slot_lock, wait_for_slots); |
371 |
| - // Recapture the outer most lock to keep making progress. |
372 |
| - lock.lock(); |
373 |
| - } |
| 372 | + WaitForPayloadSlotAvailable(&lock, default_wait_microseconds); |
374 | 373 |
|
375 | 374 | {
|
376 | 375 | std::lock_guard<std::mutex> exec_lock(
|
@@ -444,17 +443,52 @@ DynamicBatchScheduler::BatcherThread(const int nice)
|
444 | 443 | }
|
445 | 444 |
|
446 | 445 | // Finish rejected and cancelled requests if any
|
447 |
| - const static Status rejected_status = |
448 |
| - Status(Status::Code::UNAVAILABLE, "Request timeout expired"); |
449 |
| - const static Status cancelled_status = Status(Status::Code::CANCELLED); |
450 |
| - FinishSkippedRequests(std::move(rejected_requests), rejected_status); |
451 |
| - FinishSkippedRequests(std::move(cancelled_requests), cancelled_status); |
| 446 | + FinishRejectedCancelledRequests( |
| 447 | + std::move(rejected_requests), std::move(cancelled_requests)); |
452 | 448 | } // end runner loop
|
453 | 449 |
|
454 | 450 | LOG_VERBOSE(1) << "Stopping dynamic-batcher thread for " << model_name_
|
455 | 451 | << "...";
|
456 | 452 | }
|
457 | 453 |
|
| 454 | +void |
| 455 | +DynamicBatchScheduler::WaitForPayloadSlotAvailable( |
| 456 | + std::unique_lock<std::mutex>* lock, uint64_t wait_microseconds) |
| 457 | +{ |
| 458 | + // The wait_for_slots conditional can be blocking till the slots are available |
| 459 | + // for execution. Need to explicitly release the 'mu_' lock to allow the |
| 460 | + // Enqueue threads above to make progress. |
| 461 | + lock->unlock(); |
| 462 | + |
| 463 | + const std::chrono::microseconds wait_timeout(wait_microseconds); |
| 464 | + std::mutex slot_mu; |
| 465 | + std::unique_lock<std::mutex> slot_lock(slot_mu); |
| 466 | + bool slot_available = false; |
| 467 | + |
| 468 | + while (!slot_available) { |
| 469 | + slot_available = cv_.wait_for(slot_lock, wait_timeout, [this]() { |
| 470 | + return model_->Server()->GetRateLimiter()->PayloadSlotAvailable( |
| 471 | + model_, model_instance_, queue_.SupportPrefetching(), |
| 472 | + true /* force_non_blocking */); |
| 473 | + }); |
| 474 | + if (!slot_available) { |
| 475 | + // Reject and release timeout requests from queue. |
| 476 | + std::vector<std::deque<std::unique_ptr<InferenceRequest>>> |
| 477 | + rejected_requests, cancelled_requests; |
| 478 | + { |
| 479 | + std::lock_guard<std::mutex> lock(mu_); |
| 480 | + queue_.RejectTimeoutRequests(); |
| 481 | + queue_.ReleaseSkippedRequests(&rejected_requests, &cancelled_requests); |
| 482 | + } |
| 483 | + FinishRejectedCancelledRequests( |
| 484 | + std::move(rejected_requests), std::move(cancelled_requests)); |
| 485 | + } |
| 486 | + } |
| 487 | + |
| 488 | + // Recapture the lock. |
| 489 | + lock->lock(); |
| 490 | +} |
| 491 | + |
458 | 492 | uint64_t
|
459 | 493 | DynamicBatchScheduler::GetDynamicBatch()
|
460 | 494 | {
|
|
0 commit comments