|  | 
| 36 | 36 | #include "backend_model.h" | 
| 37 | 37 | #include "constants.h" | 
| 38 | 38 | #include "filesystem/api.h" | 
|  | 39 | +#include "metrics.h" | 
| 39 | 40 | #include "model.h" | 
| 40 | 41 | #include "model_config_utils.h" | 
| 41 | 42 | #include "repo_agent.h" | 
| @@ -559,11 +560,20 @@ ModelLifeCycle::CreateModel( | 
| 559 | 560 |   // backend. | 
| 560 | 561 |   if (!model_config.backend().empty()) { | 
| 561 | 562 |     std::unique_ptr<TritonModel> model; | 
|  | 563 | +    const uint64_t model_load_ns = | 
|  | 564 | +        std::chrono::duration_cast<std::chrono::nanoseconds>( | 
|  | 565 | +            std::chrono::steady_clock::now().time_since_epoch()) | 
|  | 566 | +            .count(); | 
| 562 | 567 |     status = TritonModel::Create( | 
| 563 | 568 |         server_, model_info->model_path_, options_.backend_cmdline_config_map, | 
| 564 | 569 |         options_.host_policy_map, model_id, version, model_config, | 
| 565 | 570 |         is_config_provided, &model); | 
| 566 | 571 |     is.reset(model.release()); | 
|  | 572 | +    if (status.IsOk()) { | 
|  | 573 | +#ifdef TRITON_ENABLE_METRICS | 
|  | 574 | +      CalculateAndReportLoadTime(model_load_ns, &is); | 
|  | 575 | +#endif  // TRITON_ENABLE_METRICS | 
|  | 576 | +    } | 
| 567 | 577 |   } else { | 
| 568 | 578 | #ifdef TRITON_ENABLE_ENSEMBLE | 
| 569 | 579 |     if (model_info->is_ensemble_) { | 
| @@ -799,10 +809,8 @@ ModelLifeCycle::OnLoadFinal( | 
| 799 | 809 |     // Mark current versions ready and track info in foreground | 
| 800 | 810 |     for (auto& loaded : load_tracker->load_set_) { | 
| 801 | 811 |       std::lock_guard<std::mutex> curr_info_lk(loaded.second->mtx_); | 
| 802 |  | - | 
| 803 | 812 |       loaded.second->state_ = ModelReadyState::READY; | 
| 804 | 813 |       loaded.second->state_reason_.clear(); | 
| 805 |  | - | 
| 806 | 814 |       auto bit = background_models_.find((uintptr_t)loaded.second); | 
| 807 | 815 |       // Check if the version model is loaded in background, if so, | 
| 808 | 816 |       // replace and unload the current serving version | 
| @@ -847,4 +855,35 @@ ModelLifeCycle::OnLoadFinal( | 
| 847 | 855 |   } | 
| 848 | 856 | } | 
| 849 | 857 | 
 | 
|  | 858 | +void | 
|  | 859 | +ModelLifeCycle::CalculateAndReportLoadTime( | 
|  | 860 | +    uint64_t load_start_ns_, std::unique_ptr<Model>* model) | 
|  | 861 | +{ | 
|  | 862 | +#ifdef TRITON_ENABLE_METRICS | 
|  | 863 | +  auto reporter = (*model)->MetricReporter(); | 
|  | 864 | +  const uint64_t now_ns = | 
|  | 865 | +      std::chrono::duration_cast<std::chrono::nanoseconds>( | 
|  | 866 | +          std::chrono::steady_clock::now().time_since_epoch()) | 
|  | 867 | +          .count(); | 
|  | 868 | +  uint64_t time_to_load_ns = now_ns - load_start_ns_; | 
|  | 869 | +  std::chrono::duration<double> time_to_load = | 
|  | 870 | +      std::chrono::duration_cast<std::chrono::duration<double>>( | 
|  | 871 | +          std::chrono::nanoseconds(time_to_load_ns)); | 
|  | 872 | +  ReportModelLoadTime(reporter, time_to_load); | 
|  | 873 | +#endif  // TRITON_ENABLE_METRICS | 
|  | 874 | +} | 
|  | 875 | + | 
|  | 876 | +void | 
|  | 877 | +ModelLifeCycle::ReportModelLoadTime( | 
|  | 878 | +    std::shared_ptr<MetricModelReporter> reporter, | 
|  | 879 | +    const std::chrono::duration<double>& time_to_load) | 
|  | 880 | +{ | 
|  | 881 | +#ifdef TRITON_ENABLE_METRICS | 
|  | 882 | +  if (reporter) { | 
|  | 883 | +    double load_time_in_seconds = time_to_load.count(); | 
|  | 884 | +    reporter->SetGauge(kModelLoadTimeMetric, load_time_in_seconds); | 
|  | 885 | +  } | 
|  | 886 | +#endif  // TRITON_ENABLE_METRICS | 
|  | 887 | +} | 
|  | 888 | + | 
| 850 | 889 | }}  // namespace triton::core | 
0 commit comments