Skip to content

Commit 1962cce

Browse files
authored
feat: Add model_load_time metric (#397)
Model Load time Metric added
1 parent a6b3e26 commit 1962cce

File tree

7 files changed

+74
-2
lines changed

7 files changed

+74
-2
lines changed

src/constants.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ constexpr char kInitialStateFolder[] = "initial_state";
8585

8686
// Metric names
8787
constexpr char kPendingRequestMetric[] = "inf_pending_request_count";
88+
constexpr char kModelLoadTimeMetric[] = "model_load_time";
8889

8990
constexpr uint64_t NANOS_PER_SECOND = 1000000000;
9091
constexpr uint64_t NANOS_PER_MILLIS = 1000000;

src/metric_model_reporter.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ MetricModelReporter::InitializeGauges(
251251
{
252252
// Always setup these inference request metrics, regardless of config
253253
gauge_families_[kPendingRequestMetric] = &Metrics::FamilyInferenceQueueSize();
254+
gauge_families_[kModelLoadTimeMetric] = &Metrics::FamilyModelLoadTime();
254255

255256
for (auto& iter : gauge_families_) {
256257
const auto& name = iter.first;
@@ -392,6 +393,15 @@ MetricModelReporter::IncrementGauge(const std::string& name, double value)
392393
}
393394
}
394395

396+
void
397+
MetricModelReporter::SetGauge(const std::string& name, double value)
398+
{
399+
auto gauge = GetGauge(name);
400+
if (gauge) {
401+
gauge->Set(value);
402+
}
403+
}
404+
395405
void
396406
MetricModelReporter::DecrementGauge(const std::string& name, double value)
397407
{

src/metric_model_reporter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class MetricModelReporter {
8787
const MetricReporterConfig& Config();
8888
// Lookup counter metric by name, and increment it by value if it exists.
8989
void IncrementCounter(const std::string& name, double value);
90+
// Overwrite gauge to value
91+
void SetGauge(const std::string& name, double value);
9092
// Increase gauge by value.
9193
void IncrementGauge(const std::string& name, double value);
9294
// Decrease gauge by value.

src/metrics.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ Metrics::Metrics()
109109
"execution per-model.")
110110
.Register(*registry_)),
111111

112+
model_load_time_family_(prometheus::BuildGauge()
113+
.Name("nv_model_load_duration_secs")
114+
.Help("Model load time in seconds")
115+
.Register(*registry_)),
116+
112117
pinned_memory_pool_total_family_(
113118
prometheus::BuildGauge()
114119
.Name("nv_pinned_memory_pool_total_bytes")

src/metrics.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ class Metrics {
215215
return GetSingleton()->inf_pending_request_count_family_;
216216
}
217217

218+
// Metric family of load time per model
219+
static prometheus::Family<prometheus::Gauge>& FamilyModelLoadTime()
220+
{
221+
return GetSingleton()->model_load_time_family_;
222+
}
223+
218224
// Metric families of per-model response cache metrics
219225
// NOTE: These are used in infer_stats for perf_analyzer
220226
static prometheus::Family<prometheus::Counter>& FamilyCacheHitCount()
@@ -300,6 +306,7 @@ class Metrics {
300306
prometheus::Family<prometheus::Counter>&
301307
inf_compute_output_duration_us_family_;
302308
prometheus::Family<prometheus::Gauge>& inf_pending_request_count_family_;
309+
prometheus::Family<prometheus::Gauge>& model_load_time_family_;
303310

304311
prometheus::Family<prometheus::Gauge>& pinned_memory_pool_total_family_;
305312
prometheus::Family<prometheus::Gauge>& pinned_memory_pool_used_family_;

src/model_repository_manager/model_lifecycle.cc

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "backend_model.h"
3737
#include "constants.h"
3838
#include "filesystem/api.h"
39+
#include "metrics.h"
3940
#include "model.h"
4041
#include "model_config_utils.h"
4142
#include "repo_agent.h"
@@ -559,11 +560,20 @@ ModelLifeCycle::CreateModel(
559560
// backend.
560561
if (!model_config.backend().empty()) {
561562
std::unique_ptr<TritonModel> model;
563+
const uint64_t model_load_ns =
564+
std::chrono::duration_cast<std::chrono::nanoseconds>(
565+
std::chrono::steady_clock::now().time_since_epoch())
566+
.count();
562567
status = TritonModel::Create(
563568
server_, model_info->model_path_, options_.backend_cmdline_config_map,
564569
options_.host_policy_map, model_id, version, model_config,
565570
is_config_provided, &model);
566571
is.reset(model.release());
572+
if (status.IsOk()) {
573+
#ifdef TRITON_ENABLE_METRICS
574+
CalculateAndReportLoadTime(model_load_ns, &is);
575+
#endif // TRITON_ENABLE_METRICS
576+
}
567577
} else {
568578
#ifdef TRITON_ENABLE_ENSEMBLE
569579
if (model_info->is_ensemble_) {
@@ -799,10 +809,8 @@ ModelLifeCycle::OnLoadFinal(
799809
// Mark current versions ready and track info in foreground
800810
for (auto& loaded : load_tracker->load_set_) {
801811
std::lock_guard<std::mutex> curr_info_lk(loaded.second->mtx_);
802-
803812
loaded.second->state_ = ModelReadyState::READY;
804813
loaded.second->state_reason_.clear();
805-
806814
auto bit = background_models_.find((uintptr_t)loaded.second);
807815
// Check if the version model is loaded in background, if so,
808816
// replace and unload the current serving version
@@ -847,4 +855,35 @@ ModelLifeCycle::OnLoadFinal(
847855
}
848856
}
849857

858+
void
859+
ModelLifeCycle::CalculateAndReportLoadTime(
860+
uint64_t load_start_ns_, std::unique_ptr<Model>* model)
861+
{
862+
#ifdef TRITON_ENABLE_METRICS
863+
auto reporter = (*model)->MetricReporter();
864+
const uint64_t now_ns =
865+
std::chrono::duration_cast<std::chrono::nanoseconds>(
866+
std::chrono::steady_clock::now().time_since_epoch())
867+
.count();
868+
uint64_t time_to_load_ns = now_ns - load_start_ns_;
869+
std::chrono::duration<double> time_to_load =
870+
std::chrono::duration_cast<std::chrono::duration<double>>(
871+
std::chrono::nanoseconds(time_to_load_ns));
872+
ReportModelLoadTime(reporter, time_to_load);
873+
#endif // TRITON_ENABLE_METRICS
874+
}
875+
876+
void
877+
ModelLifeCycle::ReportModelLoadTime(
878+
std::shared_ptr<MetricModelReporter> reporter,
879+
const std::chrono::duration<double>& time_to_load)
880+
{
881+
#ifdef TRITON_ENABLE_METRICS
882+
if (reporter) {
883+
double load_time_in_seconds = time_to_load.count();
884+
reporter->SetGauge(kModelLoadTimeMetric, load_time_in_seconds);
885+
}
886+
#endif // TRITON_ENABLE_METRICS
887+
}
888+
850889
}} // namespace triton::core

src/model_repository_manager/model_lifecycle.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <map>
3232
#include <mutex>
3333

34+
#include "backend_model.h"
3435
#include "infer_parameter.h"
3536
#include "model.h"
3637
#include "model_config.pb.h"
@@ -313,6 +314,13 @@ class ModelLifeCycle {
313314
ModelInfo* model_info, const bool is_update,
314315
const std::function<void(Status)>& OnComplete,
315316
std::shared_ptr<LoadTracker> load_tracker);
317+
// Calculate time to load model
318+
void CalculateAndReportLoadTime(
319+
uint64_t load_start_ns_, std::unique_ptr<Model>* model);
320+
// Report Load time per model metrics
321+
void ReportModelLoadTime(
322+
std::shared_ptr<MetricModelReporter> reporter,
323+
const std::chrono::duration<double>& time_to_load);
316324
// Helper function for 'OnLoadComplete()' to finish final operations after
317325
// loading **all** model versions.
318326
void OnLoadFinal(

0 commit comments

Comments
 (0)