Skip to content

Commit 34d2650

Browse files
authored
Server stop will wait until all background models are unloaded (#323)
* Unload all models wait until all models complete loading * [NOT ROOT CAUSE] Revert "Unload all models wait until all models complete loading" This reverts commit 62a4e99. * Server shutdown must wait for background models * Show bg model size when printing timeout info
1 parent 6e91ff3 commit 34d2650

File tree

5 files changed

+25
-4
lines changed

5 files changed

+25
-4
lines changed

src/model_repository_manager/model_lifecycle.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,14 @@ ModelLifeCycle::InflightStatus()
259259
return inflight_status;
260260
}
261261

262+
size_t
263+
ModelLifeCycle::BackgroundModelsSize()
264+
{
265+
LOG_VERBOSE(2) << "BackgroundModelsSize()";
266+
std::lock_guard<std::mutex> map_lock(map_mtx_);
267+
return background_models_.size();
268+
}
269+
262270
const ModelStateMap
263271
ModelLifeCycle::ModelStates()
264272
{

src/model_repository_manager/model_lifecycle.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ class ModelLifeCycle {
221221
// that don't have in-flight inferences will not be included.
222222
const std::set<std::tuple<ModelIdentifier, int64_t, size_t>> InflightStatus();
223223

224+
// Return the number of model(s) in the background.
225+
size_t BackgroundModelsSize();
226+
224227
private:
225228
struct ModelInfo {
226229
ModelInfo(

src/model_repository_manager/model_repository_manager.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,12 @@ ModelRepositoryManager::InflightStatus()
981981
return model_life_cycle_->InflightStatus();
982982
}
983983

984+
size_t
985+
ModelRepositoryManager::BackgroundModelsSize()
986+
{
987+
return model_life_cycle_->BackgroundModelsSize();
988+
}
989+
984990
const ModelStateMap
985991
ModelRepositoryManager::LiveModelStates(bool strict_readiness)
986992
{

src/model_repository_manager/model_repository_manager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,9 @@ class ModelRepositoryManager {
438438
/// if it doesn't have in-flight inferences.
439439
const std::set<std::tuple<ModelIdentifier, int64_t, size_t>> InflightStatus();
440440

441+
/// \return the number of model(s) in the background.
442+
size_t BackgroundModelsSize();
443+
441444
/// \param strict_readiness If true, only models that have at least one
442445
/// ready version will be considered as live. Otherwise, the models that
443446
/// have loading / unloading versions will also be live.

src/server.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,10 +341,11 @@ InferenceServer::Stop(const bool force)
341341
}
342342
} else {
343343
const auto& live_models = model_repository_manager_->LiveModelStates();
344+
size_t bg_models_size = model_repository_manager_->BackgroundModelsSize();
345+
size_t num_models = live_models.size() + bg_models_size;
344346

345-
LOG_INFO << "Timeout " << exit_timeout_iters << ": Found "
346-
<< live_models.size() << " live models and "
347-
<< inflight_request_counter_
347+
LOG_INFO << "Timeout " << exit_timeout_iters << ": Found " << num_models
348+
<< " live models and " << inflight_request_counter_
348349
<< " in-flight non-inference requests";
349350
if (LOG_VERBOSE_IS_ON(1)) {
350351
for (const auto& m : live_models) {
@@ -355,7 +356,7 @@ InferenceServer::Stop(const bool force)
355356
}
356357
}
357358

358-
if ((live_models.size() == 0) && (inflight_request_counter_ == 0)) {
359+
if ((num_models == 0) && (inflight_request_counter_ == 0)) {
359360
return Status::Success;
360361
}
361362
}

0 commit comments

Comments
 (0)