Add model namespacing into metrics (#343)

kthui · web-flow · commit 5fc928af6808 · 2024-04-26T14:53:58.000-07:00
* Add model namespacing into metrics

* Comment on model version number
diff --git a/src/backend_model.cc b/src/backend_model.cc
@@ -61,8 +61,9 @@ TritonModel::Create(
     InferenceServer* server, const std::string& model_path,
     const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
     const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-    const int64_t version, inference::ModelConfig model_config,
-    const bool is_config_provided, std::unique_ptr<TritonModel>* model)
+    const ModelIdentifier& model_id, const int64_t version,
+    inference::ModelConfig model_config, const bool is_config_provided,
+    std::unique_ptr<TritonModel>* model)
 {
   model->reset();
 
@@ -143,8 +144,8 @@ TritonModel::Create(
 
   // Create and initialize the model.
   std::unique_ptr<TritonModel> local_model(new TritonModel(
-      server, localized_model_dir, backend, min_compute_capability, version,
-      model_config, auto_complete_config, backend_cmdline_config_map,
+      server, localized_model_dir, backend, min_compute_capability, model_id,
+      version, model_config, auto_complete_config, backend_cmdline_config_map,
       host_policy_map));
 
   TritonModel* raw_local_model = local_model.get();
@@ -929,12 +930,14 @@ TritonModel::TritonModel(
     InferenceServer* server,
     const std::shared_ptr<LocalizedPath>& localized_model_dir,
     const std::shared_ptr<TritonBackend>& backend,
-    const double min_compute_capability, const int64_t version,
-    const inference::ModelConfig& config, const bool auto_complete_config,
+    const double min_compute_capability, const ModelIdentifier& model_id,
+    const int64_t version, const inference::ModelConfig& config,
+    const bool auto_complete_config,
     const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
     const triton::common::HostPolicyCmdlineConfigMap& host_policy_map)
     : Model(
-          min_compute_capability, localized_model_dir->Path(), version, config),
+          min_compute_capability, localized_model_dir->Path(), model_id,
+          version, config),
       server_(server), min_compute_capability_(min_compute_capability),
       auto_complete_config_(auto_complete_config),
       backend_cmdline_config_map_(backend_cmdline_config_map),
diff --git a/src/backend_model.h b/src/backend_model.h
@@ -61,8 +61,9 @@ class TritonModel : public Model {
       InferenceServer* server, const std::string& model_path,
       const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
       const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const int64_t version, inference::ModelConfig model_config,
-      const bool is_config_provided, std::unique_ptr<TritonModel>* model);
+      const ModelIdentifier& model_id, const int64_t version,
+      inference::ModelConfig model_config, const bool is_config_provided,
+      std::unique_ptr<TritonModel>* model);
   ~TritonModel();
 
   // Return path to the localized model directory.
@@ -118,8 +119,9 @@ class TritonModel : public Model {
       InferenceServer* server,
       const std::shared_ptr<LocalizedPath>& localized_model_dir,
       const std::shared_ptr<TritonBackend>& backend,
-      const double min_compute_capability, const int64_t version,
-      const inference::ModelConfig& config, const bool auto_complete_config,
+      const double min_compute_capability, const ModelIdentifier& model_id,
+      const int64_t version, const inference::ModelConfig& config,
+      const bool auto_complete_config,
       const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
       const triton::common::HostPolicyCmdlineConfigMap& host_policy_map);
 
diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc
@@ -190,7 +190,7 @@ TritonModelInstance::TritonModelInstance(
         model_->ResponseCacheEnabled() &&
         model_->Server()->ResponseCacheEnabled();
     MetricModelReporter::Create(
-        model_->Name(), model_->Version(), id, response_cache_enabled,
+        model_->ModelId(), model_->Version(), id, response_cache_enabled,
         model_->Config().metric_tags(), &reporter_);
   }
 #endif  // TRITON_ENABLE_METRICS
diff --git a/src/constants.h b/src/constants.h
@@ -1,4 +1,4 @@
-// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -72,6 +72,7 @@ constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
 
 constexpr char kModelConfigPbTxt[] = "config.pbtxt";
 
+constexpr char kMetricsLabelModelNamespace[] = "namespace";
 constexpr char kMetricsLabelModelName[] = "model";
 constexpr char kMetricsLabelModelVersion[] = "version";
 constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
diff --git a/src/ensemble_scheduler/ensemble_model.cc b/src/ensemble_scheduler/ensemble_model.cc
@@ -1,4 +1,4 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -37,19 +37,21 @@ namespace triton { namespace core {
 
 Status
 EnsembleModel::Create(
-    InferenceServer* server, const std::string& path, const int64_t version,
+    InferenceServer* server, const std::string& path,
+    const ModelIdentifier& model_id, const int64_t version,
     const inference::ModelConfig& model_config, const bool is_config_provided,
     const double min_compute_capability, std::unique_ptr<Model>* model)
 {
   // Create the ensemble model.
-  std::unique_ptr<EnsembleModel> local_model(
-      new EnsembleModel(min_compute_capability, path, version, model_config));
+  std::unique_ptr<EnsembleModel> local_model(new EnsembleModel(
+      min_compute_capability, path, model_id, version, model_config));
 
   RETURN_IF_ERROR(local_model->Init(is_config_provided));
 
   std::unique_ptr<Scheduler> scheduler;
   RETURN_IF_ERROR(EnsembleScheduler::Create(
-      local_model->MutableStatsAggregator(), server, model_config, &scheduler));
+      local_model->MutableStatsAggregator(), server, local_model->ModelId(),
+      model_config, &scheduler));
   RETURN_IF_ERROR(local_model->SetScheduler(std::move(scheduler)));
 
   LOG_VERBOSE(1) << "ensemble model for " << local_model->Name() << std::endl;
diff --git a/src/ensemble_scheduler/ensemble_model.h b/src/ensemble_scheduler/ensemble_model.h
@@ -1,4 +1,4 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -39,7 +39,8 @@ class EnsembleModel : public Model {
   EnsembleModel(EnsembleModel&&) = default;
 
   static Status Create(
-      InferenceServer* server, const std::string& path, const int64_t version,
+      InferenceServer* server, const std::string& path,
+      const ModelIdentifier& model_id, const int64_t version,
       const inference::ModelConfig& model_config, const bool is_config_provided,
       const double min_compute_capability, std::unique_ptr<Model>* model);
 
@@ -48,8 +49,9 @@ class EnsembleModel : public Model {
 
   explicit EnsembleModel(
       const double min_compute_capability, const std::string& model_dir,
-      const int64_t version, const inference::ModelConfig& config)
-      : Model(min_compute_capability, model_dir, version, config)
+      const ModelIdentifier& model_id, const int64_t version,
+      const inference::ModelConfig& config)
+      : Model(min_compute_capability, model_dir, model_id, version, config)
   {
   }
   friend std::ostream& operator<<(std::ostream&, const EnsembleModel&);
diff --git a/src/ensemble_scheduler/ensemble_scheduler.cc b/src/ensemble_scheduler/ensemble_scheduler.cc
@@ -1,4 +1,4 @@
-// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -1311,10 +1311,11 @@ EnsembleContext::ScheduleSteps(
 Status
 EnsembleScheduler::Create(
     InferenceStatsAggregator* const stats_aggregator,
-    InferenceServer* const server, const inference::ModelConfig& config,
-    std::unique_ptr<Scheduler>* scheduler)
+    InferenceServer* const server, const ModelIdentifier& model_id,
+    const inference::ModelConfig& config, std::unique_ptr<Scheduler>* scheduler)
 {
-  scheduler->reset(new EnsembleScheduler(stats_aggregator, server, config));
+  scheduler->reset(
+      new EnsembleScheduler(stats_aggregator, server, model_id, config));
   return Status::Success;
 }
 
@@ -1353,7 +1354,8 @@ EnsembleScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
 
 EnsembleScheduler::EnsembleScheduler(
     InferenceStatsAggregator* const stats_aggregator,
-    InferenceServer* const server, const inference::ModelConfig& config)
+    InferenceServer* const server, const ModelIdentifier& model_id,
+    const inference::ModelConfig& config)
     : stats_aggregator_(stats_aggregator), is_(server), stream_(nullptr),
       inflight_count_(0)
 {
@@ -1371,7 +1373,7 @@ EnsembleScheduler::EnsembleScheduler(
   if (Metrics::Enabled()) {
     // Ensemble scheduler doesn't currently support response cache at top level.
     MetricModelReporter::Create(
-        config.name(), 1, METRIC_REPORTER_ID_CPU,
+        model_id, 1 /* model_version */, METRIC_REPORTER_ID_CPU,
         false /* response_cache_enabled */, config.metric_tags(),
         &metric_reporter_);
   }
diff --git a/src/ensemble_scheduler/ensemble_scheduler.h b/src/ensemble_scheduler/ensemble_scheduler.h
@@ -1,4 +1,4 @@
-// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -88,7 +88,8 @@ class EnsembleScheduler : public Scheduler {
   // to dispatch requests to models in ensemble internally.
   static Status Create(
       InferenceStatsAggregator* const stats_aggregator,
-      InferenceServer* const server, const inference::ModelConfig& config,
+      InferenceServer* const server, const ModelIdentifier& model_id,
+      const inference::ModelConfig& config,
       std::unique_ptr<Scheduler>* scheduler);
 
   ~EnsembleScheduler();
@@ -105,7 +106,8 @@ class EnsembleScheduler : public Scheduler {
  private:
   EnsembleScheduler(
       InferenceStatsAggregator* const stats_aggregator,
-      InferenceServer* const server, const inference::ModelConfig& config);
+      InferenceServer* const server, const ModelIdentifier& model_id,
+      const inference::ModelConfig& config);
 
   std::shared_ptr<MetricModelReporter> metric_reporter_;
   InferenceStatsAggregator* const stats_aggregator_;
diff --git a/src/metric_model_reporter.cc b/src/metric_model_reporter.cc
@@ -1,4 +1,4 @@
-// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -103,7 +103,7 @@ MetricReporterConfig::ParseQuantiles(std::string options)
 //
 Status
 MetricModelReporter::Create(
-    const std::string& model_name, const int64_t model_version,
+    const ModelIdentifier& model_id, const int64_t model_version,
     const int device, bool response_cache_enabled,
     const triton::common::MetricTagsMap& model_tags,
     std::shared_ptr<MetricModelReporter>* metric_model_reporter)
@@ -113,7 +113,7 @@ MetricModelReporter::Create(
       reporter_map;
 
   std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
+  GetMetricLabels(&labels, model_id, model_version, device, model_tags);
   auto hash_labels = Metrics::HashLabels(labels);
 
   std::lock_guard<std::mutex> lock(mtx);
@@ -133,18 +133,18 @@ MetricModelReporter::Create(
   }
 
   metric_model_reporter->reset(new MetricModelReporter(
-      model_name, model_version, device, response_cache_enabled, model_tags));
+      model_id, model_version, device, response_cache_enabled, model_tags));
   reporter_map.insert({hash_labels, *metric_model_reporter});
   return Status::Success;
 }
 
 MetricModelReporter::MetricModelReporter(
-    const std::string& model_name, const int64_t model_version,
+    const ModelIdentifier& model_id, const int64_t model_version,
     const int device, bool response_cache_enabled,
     const triton::common::MetricTagsMap& model_tags)
 {
   std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
+  GetMetricLabels(&labels, model_id, model_version, device, model_tags);
 
   // Parse metrics config to control metric setup and behavior
   config_.ParseConfig(response_cache_enabled);
@@ -290,12 +290,16 @@ MetricModelReporter::InitializeSummaries(
 
 void
 MetricModelReporter::GetMetricLabels(
-    std::map<std::string, std::string>* labels, const std::string& model_name,
+    std::map<std::string, std::string>* labels, const ModelIdentifier& model_id,
     const int64_t model_version, const int device,
     const triton::common::MetricTagsMap& model_tags)
 {
+  if (!model_id.NamespaceDisabled()) {
+    labels->insert(std::map<std::string, std::string>::value_type(
+        std::string(kMetricsLabelModelNamespace), model_id.namespace_));
+  }
   labels->insert(std::map<std::string, std::string>::value_type(
-      std::string(kMetricsLabelModelName), model_name));
+      std::string(kMetricsLabelModelName), model_id.name_));
   labels->insert(std::map<std::string, std::string>::value_type(
       std::string(kMetricsLabelModelVersion), std::to_string(model_version)));
   for (const auto& tag : model_tags) {
diff --git a/src/metric_model_reporter.h b/src/metric_model_reporter.h
@@ -1,4 +1,4 @@
-// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -30,11 +30,16 @@
 
 #ifdef TRITON_ENABLE_METRICS
 #include "metrics.h"
+#include "model.h"
 #include "prometheus/registry.h"
 #endif  // TRITON_ENABLE_METRICS
 
 namespace triton { namespace core {
 
+#ifdef TRITON_ENABLE_METRICS
+struct ModelIdentifier;
+#endif  // TRITON_ENABLE_METRICS
+
 //
 // MetricReporterConfig
 //
@@ -70,8 +75,9 @@ class MetricModelReporter {
  public:
 #ifdef TRITON_ENABLE_METRICS
   static Status Create(
-      const std::string& model_name, const int64_t model_version,
-      const int device, bool response_cache_enabled,
+      const triton::core::ModelIdentifier& model_id,
+      const int64_t model_version, const int device,
+      bool response_cache_enabled,
       const triton::common::MetricTagsMap& model_tags,
       std::shared_ptr<MetricModelReporter>* metric_model_reporter);
 
@@ -90,14 +96,14 @@ class MetricModelReporter {
 
  private:
   MetricModelReporter(
-      const std::string& model_name, const int64_t model_version,
+      const ModelIdentifier& model_id, const int64_t model_version,
       const int device, bool response_cache_enabled,
       const triton::common::MetricTagsMap& model_tags);
 
   static void GetMetricLabels(
-      std::map<std::string, std::string>* labels, const std::string& model_name,
-      const int64_t model_version, const int device,
-      const triton::common::MetricTagsMap& model_tags);
+      std::map<std::string, std::string>* labels,
+      const ModelIdentifier& model_id, const int64_t model_version,
+      const int device, const triton::common::MetricTagsMap& model_tags);
 
   template <typename T, typename... Args>
   T* CreateMetric(
diff --git a/src/model.cc b/src/model.cc
@@ -1,4 +1,4 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -134,7 +134,7 @@ Model::Init(const bool is_config_provided)
 
 #ifdef TRITON_ENABLE_METRICS
   MetricModelReporter::Create(
-      Name(), Version(), METRIC_REPORTER_ID_UTILITY, ResponseCacheEnabled(),
+      ModelId(), Version(), METRIC_REPORTER_ID_UTILITY, ResponseCacheEnabled(),
       Config().metric_tags(), &reporter_);
 #endif  // TRITON_ENABLE_METRICS
 
diff --git a/src/model.h b/src/model.h
diff --git a/src/model_repository_manager/model_lifecycle.cc b/src/model_repository_manager/model_lifecycle.cc

Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@ TritonModelInstance::TritonModelInstance(`
`190`	`190`	`model_->ResponseCacheEnabled() &&`
`191`	`191`	`model_->Server()->ResponseCacheEnabled();`
`192`	`192`	`MetricModelReporter::Create(`
`193`		`- model_->Name(), model_->Version(), id, response_cache_enabled,`
	`193`	`+ model_->ModelId(), model_->Version(), id, response_cache_enabled,`
`194`	`194`	`model_->Config().metric_tags(), &reporter_);`
`195`	`195`	`}`
`196`	`196`	`#endif // TRITON_ENABLE_METRICS`