File tree Expand file tree Collapse file tree 2 files changed +12
-2
lines changed Expand file tree Collapse file tree 2 files changed +12
-2
lines changed Original file line number Diff line number Diff line change @@ -72,8 +72,17 @@ RateLimiter::RegisterModelInstance(
72
72
model_context.AddSpecificRequestQueue ();
73
73
74
74
if (!ignore_resources_and_priority_) {
75
+ // As there can be mulitple models being loaded concurrently, need
76
+ // to hold a lock to protect the resource counts.
77
+ // Without this serialization instances of other models might fail
78
+ // to load because of the resource constraints in this instance.
79
+ std::lock_guard<std::mutex> lk (resource_manager_mtx_);
75
80
resource_manager_->AddModelInstance (model_instances.back ().get ());
76
- RETURN_IF_ERROR (resource_manager_->UpdateResourceLimits ());
81
+ const auto & status = resource_manager_->UpdateResourceLimits ();
82
+ if (!status.IsOk ()) {
83
+ resource_manager_->RemoveModelInstance (model_instances.back ().get ());
84
+ return status;
85
+ }
77
86
}
78
87
}
79
88
Original file line number Diff line number Diff line change 1
- // Copyright 2020-2022 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ // Copyright 2020-2023 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
//
3
3
// Redistribution and use in source and binary forms, with or without
4
4
// modification, are permitted provided that the following conditions
@@ -314,6 +314,7 @@ class RateLimiter {
314
314
315
315
// Manager to keep track of the resource allocations
316
316
std::unique_ptr<ResourceManager> resource_manager_;
317
+ std::mutex resource_manager_mtx_;
317
318
318
319
// Mutex to serialize Payload [de]allocation
319
320
std::mutex payload_mu_;
You can’t perform that action at this time.
0 commit comments