Skip to content

Commit 5fc928a

Browse files
authored
Add model namespacing into metrics (#343)
* Add model namespacing into metrics * Comment on model version number
1 parent b39162c commit 5fc928a

13 files changed

+91
-58
lines changed

src/backend_model.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ TritonModel::Create(
6161
InferenceServer* server, const std::string& model_path,
6262
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
6363
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
64-
const int64_t version, inference::ModelConfig model_config,
65-
const bool is_config_provided, std::unique_ptr<TritonModel>* model)
64+
const ModelIdentifier& model_id, const int64_t version,
65+
inference::ModelConfig model_config, const bool is_config_provided,
66+
std::unique_ptr<TritonModel>* model)
6667
{
6768
model->reset();
6869

@@ -143,8 +144,8 @@ TritonModel::Create(
143144

144145
// Create and initialize the model.
145146
std::unique_ptr<TritonModel> local_model(new TritonModel(
146-
server, localized_model_dir, backend, min_compute_capability, version,
147-
model_config, auto_complete_config, backend_cmdline_config_map,
147+
server, localized_model_dir, backend, min_compute_capability, model_id,
148+
version, model_config, auto_complete_config, backend_cmdline_config_map,
148149
host_policy_map));
149150

150151
TritonModel* raw_local_model = local_model.get();
@@ -929,12 +930,14 @@ TritonModel::TritonModel(
929930
InferenceServer* server,
930931
const std::shared_ptr<LocalizedPath>& localized_model_dir,
931932
const std::shared_ptr<TritonBackend>& backend,
932-
const double min_compute_capability, const int64_t version,
933-
const inference::ModelConfig& config, const bool auto_complete_config,
933+
const double min_compute_capability, const ModelIdentifier& model_id,
934+
const int64_t version, const inference::ModelConfig& config,
935+
const bool auto_complete_config,
934936
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
935937
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map)
936938
: Model(
937-
min_compute_capability, localized_model_dir->Path(), version, config),
939+
min_compute_capability, localized_model_dir->Path(), model_id,
940+
version, config),
938941
server_(server), min_compute_capability_(min_compute_capability),
939942
auto_complete_config_(auto_complete_config),
940943
backend_cmdline_config_map_(backend_cmdline_config_map),

src/backend_model.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ class TritonModel : public Model {
6161
InferenceServer* server, const std::string& model_path,
6262
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
6363
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
64-
const int64_t version, inference::ModelConfig model_config,
65-
const bool is_config_provided, std::unique_ptr<TritonModel>* model);
64+
const ModelIdentifier& model_id, const int64_t version,
65+
inference::ModelConfig model_config, const bool is_config_provided,
66+
std::unique_ptr<TritonModel>* model);
6667
~TritonModel();
6768

6869
// Return path to the localized model directory.
@@ -118,8 +119,9 @@ class TritonModel : public Model {
118119
InferenceServer* server,
119120
const std::shared_ptr<LocalizedPath>& localized_model_dir,
120121
const std::shared_ptr<TritonBackend>& backend,
121-
const double min_compute_capability, const int64_t version,
122-
const inference::ModelConfig& config, const bool auto_complete_config,
122+
const double min_compute_capability, const ModelIdentifier& model_id,
123+
const int64_t version, const inference::ModelConfig& config,
124+
const bool auto_complete_config,
123125
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
124126
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map);
125127

src/backend_model_instance.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ TritonModelInstance::TritonModelInstance(
190190
model_->ResponseCacheEnabled() &&
191191
model_->Server()->ResponseCacheEnabled();
192192
MetricModelReporter::Create(
193-
model_->Name(), model_->Version(), id, response_cache_enabled,
193+
model_->ModelId(), model_->Version(), id, response_cache_enabled,
194194
model_->Config().metric_tags(), &reporter_);
195195
}
196196
#endif // TRITON_ENABLE_METRICS

src/constants.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -72,6 +72,7 @@ constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
7272

7373
constexpr char kModelConfigPbTxt[] = "config.pbtxt";
7474

75+
constexpr char kMetricsLabelModelNamespace[] = "namespace";
7576
constexpr char kMetricsLabelModelName[] = "model";
7677
constexpr char kMetricsLabelModelVersion[] = "version";
7778
constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";

src/ensemble_scheduler/ensemble_model.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -37,19 +37,21 @@ namespace triton { namespace core {
3737

3838
Status
3939
EnsembleModel::Create(
40-
InferenceServer* server, const std::string& path, const int64_t version,
40+
InferenceServer* server, const std::string& path,
41+
const ModelIdentifier& model_id, const int64_t version,
4142
const inference::ModelConfig& model_config, const bool is_config_provided,
4243
const double min_compute_capability, std::unique_ptr<Model>* model)
4344
{
4445
// Create the ensemble model.
45-
std::unique_ptr<EnsembleModel> local_model(
46-
new EnsembleModel(min_compute_capability, path, version, model_config));
46+
std::unique_ptr<EnsembleModel> local_model(new EnsembleModel(
47+
min_compute_capability, path, model_id, version, model_config));
4748

4849
RETURN_IF_ERROR(local_model->Init(is_config_provided));
4950

5051
std::unique_ptr<Scheduler> scheduler;
5152
RETURN_IF_ERROR(EnsembleScheduler::Create(
52-
local_model->MutableStatsAggregator(), server, model_config, &scheduler));
53+
local_model->MutableStatsAggregator(), server, local_model->ModelId(),
54+
model_config, &scheduler));
5355
RETURN_IF_ERROR(local_model->SetScheduler(std::move(scheduler)));
5456

5557
LOG_VERBOSE(1) << "ensemble model for " << local_model->Name() << std::endl;

src/ensemble_scheduler/ensemble_model.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -39,7 +39,8 @@ class EnsembleModel : public Model {
3939
EnsembleModel(EnsembleModel&&) = default;
4040

4141
static Status Create(
42-
InferenceServer* server, const std::string& path, const int64_t version,
42+
InferenceServer* server, const std::string& path,
43+
const ModelIdentifier& model_id, const int64_t version,
4344
const inference::ModelConfig& model_config, const bool is_config_provided,
4445
const double min_compute_capability, std::unique_ptr<Model>* model);
4546

@@ -48,8 +49,9 @@ class EnsembleModel : public Model {
4849

4950
explicit EnsembleModel(
5051
const double min_compute_capability, const std::string& model_dir,
51-
const int64_t version, const inference::ModelConfig& config)
52-
: Model(min_compute_capability, model_dir, version, config)
52+
const ModelIdentifier& model_id, const int64_t version,
53+
const inference::ModelConfig& config)
54+
: Model(min_compute_capability, model_dir, model_id, version, config)
5355
{
5456
}
5557
friend std::ostream& operator<<(std::ostream&, const EnsembleModel&);

src/ensemble_scheduler/ensemble_scheduler.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -1311,10 +1311,11 @@ EnsembleContext::ScheduleSteps(
13111311
Status
13121312
EnsembleScheduler::Create(
13131313
InferenceStatsAggregator* const stats_aggregator,
1314-
InferenceServer* const server, const inference::ModelConfig& config,
1315-
std::unique_ptr<Scheduler>* scheduler)
1314+
InferenceServer* const server, const ModelIdentifier& model_id,
1315+
const inference::ModelConfig& config, std::unique_ptr<Scheduler>* scheduler)
13161316
{
1317-
scheduler->reset(new EnsembleScheduler(stats_aggregator, server, config));
1317+
scheduler->reset(
1318+
new EnsembleScheduler(stats_aggregator, server, model_id, config));
13181319
return Status::Success;
13191320
}
13201321

@@ -1353,7 +1354,8 @@ EnsembleScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
13531354

13541355
EnsembleScheduler::EnsembleScheduler(
13551356
InferenceStatsAggregator* const stats_aggregator,
1356-
InferenceServer* const server, const inference::ModelConfig& config)
1357+
InferenceServer* const server, const ModelIdentifier& model_id,
1358+
const inference::ModelConfig& config)
13571359
: stats_aggregator_(stats_aggregator), is_(server), stream_(nullptr),
13581360
inflight_count_(0)
13591361
{
@@ -1371,7 +1373,7 @@ EnsembleScheduler::EnsembleScheduler(
13711373
if (Metrics::Enabled()) {
13721374
// Ensemble scheduler doesn't currently support response cache at top level.
13731375
MetricModelReporter::Create(
1374-
config.name(), 1, METRIC_REPORTER_ID_CPU,
1376+
model_id, 1 /* model_version */, METRIC_REPORTER_ID_CPU,
13751377
false /* response_cache_enabled */, config.metric_tags(),
13761378
&metric_reporter_);
13771379
}

src/ensemble_scheduler/ensemble_scheduler.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -88,7 +88,8 @@ class EnsembleScheduler : public Scheduler {
8888
// to dispatch requests to models in ensemble internally.
8989
static Status Create(
9090
InferenceStatsAggregator* const stats_aggregator,
91-
InferenceServer* const server, const inference::ModelConfig& config,
91+
InferenceServer* const server, const ModelIdentifier& model_id,
92+
const inference::ModelConfig& config,
9293
std::unique_ptr<Scheduler>* scheduler);
9394

9495
~EnsembleScheduler();
@@ -105,7 +106,8 @@ class EnsembleScheduler : public Scheduler {
105106
private:
106107
EnsembleScheduler(
107108
InferenceStatsAggregator* const stats_aggregator,
108-
InferenceServer* const server, const inference::ModelConfig& config);
109+
InferenceServer* const server, const ModelIdentifier& model_id,
110+
const inference::ModelConfig& config);
109111

110112
std::shared_ptr<MetricModelReporter> metric_reporter_;
111113
InferenceStatsAggregator* const stats_aggregator_;

src/metric_model_reporter.cc

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -103,7 +103,7 @@ MetricReporterConfig::ParseQuantiles(std::string options)
103103
//
104104
Status
105105
MetricModelReporter::Create(
106-
const std::string& model_name, const int64_t model_version,
106+
const ModelIdentifier& model_id, const int64_t model_version,
107107
const int device, bool response_cache_enabled,
108108
const triton::common::MetricTagsMap& model_tags,
109109
std::shared_ptr<MetricModelReporter>* metric_model_reporter)
@@ -113,7 +113,7 @@ MetricModelReporter::Create(
113113
reporter_map;
114114

115115
std::map<std::string, std::string> labels;
116-
GetMetricLabels(&labels, model_name, model_version, device, model_tags);
116+
GetMetricLabels(&labels, model_id, model_version, device, model_tags);
117117
auto hash_labels = Metrics::HashLabels(labels);
118118

119119
std::lock_guard<std::mutex> lock(mtx);
@@ -133,18 +133,18 @@ MetricModelReporter::Create(
133133
}
134134

135135
metric_model_reporter->reset(new MetricModelReporter(
136-
model_name, model_version, device, response_cache_enabled, model_tags));
136+
model_id, model_version, device, response_cache_enabled, model_tags));
137137
reporter_map.insert({hash_labels, *metric_model_reporter});
138138
return Status::Success;
139139
}
140140

141141
MetricModelReporter::MetricModelReporter(
142-
const std::string& model_name, const int64_t model_version,
142+
const ModelIdentifier& model_id, const int64_t model_version,
143143
const int device, bool response_cache_enabled,
144144
const triton::common::MetricTagsMap& model_tags)
145145
{
146146
std::map<std::string, std::string> labels;
147-
GetMetricLabels(&labels, model_name, model_version, device, model_tags);
147+
GetMetricLabels(&labels, model_id, model_version, device, model_tags);
148148

149149
// Parse metrics config to control metric setup and behavior
150150
config_.ParseConfig(response_cache_enabled);
@@ -290,12 +290,16 @@ MetricModelReporter::InitializeSummaries(
290290

291291
void
292292
MetricModelReporter::GetMetricLabels(
293-
std::map<std::string, std::string>* labels, const std::string& model_name,
293+
std::map<std::string, std::string>* labels, const ModelIdentifier& model_id,
294294
const int64_t model_version, const int device,
295295
const triton::common::MetricTagsMap& model_tags)
296296
{
297+
if (!model_id.NamespaceDisabled()) {
298+
labels->insert(std::map<std::string, std::string>::value_type(
299+
std::string(kMetricsLabelModelNamespace), model_id.namespace_));
300+
}
297301
labels->insert(std::map<std::string, std::string>::value_type(
298-
std::string(kMetricsLabelModelName), model_name));
302+
std::string(kMetricsLabelModelName), model_id.name_));
299303
labels->insert(std::map<std::string, std::string>::value_type(
300304
std::string(kMetricsLabelModelVersion), std::to_string(model_version)));
301305
for (const auto& tag : model_tags) {

src/metric_model_reporter.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -30,11 +30,16 @@
3030

3131
#ifdef TRITON_ENABLE_METRICS
3232
#include "metrics.h"
33+
#include "model.h"
3334
#include "prometheus/registry.h"
3435
#endif // TRITON_ENABLE_METRICS
3536

3637
namespace triton { namespace core {
3738

39+
#ifdef TRITON_ENABLE_METRICS
40+
struct ModelIdentifier;
41+
#endif // TRITON_ENABLE_METRICS
42+
3843
//
3944
// MetricReporterConfig
4045
//
@@ -70,8 +75,9 @@ class MetricModelReporter {
7075
public:
7176
#ifdef TRITON_ENABLE_METRICS
7277
static Status Create(
73-
const std::string& model_name, const int64_t model_version,
74-
const int device, bool response_cache_enabled,
78+
const triton::core::ModelIdentifier& model_id,
79+
const int64_t model_version, const int device,
80+
bool response_cache_enabled,
7581
const triton::common::MetricTagsMap& model_tags,
7682
std::shared_ptr<MetricModelReporter>* metric_model_reporter);
7783

@@ -90,14 +96,14 @@ class MetricModelReporter {
9096

9197
private:
9298
MetricModelReporter(
93-
const std::string& model_name, const int64_t model_version,
99+
const ModelIdentifier& model_id, const int64_t model_version,
94100
const int device, bool response_cache_enabled,
95101
const triton::common::MetricTagsMap& model_tags);
96102

97103
static void GetMetricLabels(
98-
std::map<std::string, std::string>* labels, const std::string& model_name,
99-
const int64_t model_version, const int device,
100-
const triton::common::MetricTagsMap& model_tags);
104+
std::map<std::string, std::string>* labels,
105+
const ModelIdentifier& model_id, const int64_t model_version,
106+
const int device, const triton::common::MetricTagsMap& model_tags);
101107

102108
template <typename T, typename... Args>
103109
T* CreateMetric(

0 commit comments

Comments
 (0)