Skip to content

Commit b699ace

Browse files
authored
Add --metrics-config and Summary support (#187)
* Add metrics config setter APIs (#181) * Add support for summaries in latency metrics (#183) * Hide metrics references within metrics ifdef (#186)
1 parent 615f885 commit b699ace

13 files changed

+555
-163
lines changed

include/triton/core/tritonserver.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
9191
/// }
9292
///
9393
#define TRITONSERVER_API_VERSION_MAJOR 1
94-
#define TRITONSERVER_API_VERSION_MINOR 21
94+
#define TRITONSERVER_API_VERSION_MINOR 22
9595

9696
/// Get the TRITONBACKEND API version supported by the Triton shared
9797
/// library. This value can be compared against the
@@ -1301,7 +1301,8 @@ TRITONSERVER_InferenceRequestSetStringParameter(
13011301
/// \return a TRITONSERVER_Error indicating success or failure.
13021302
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
13031303
TRITONSERVER_InferenceRequestSetIntParameter(
1304-
TRITONSERVER_InferenceRequest* request, const char* key, const int64_t value);
1304+
TRITONSERVER_InferenceRequest* request, const char* key,
1305+
const int64_t value);
13051306

13061307
/// Set a boolean parameter in the request.
13071308
///
@@ -2011,6 +2012,19 @@ TRITONSERVER_ServerOptionsSetHostPolicy(
20112012
TRITONSERVER_ServerOptions* options, const char* policy_name,
20122013
const char* setting, const char* value);
20132014

2015+
/// Set a configuration setting for metrics in server options.
2016+
///
2017+
/// \param options The server options object.
2018+
/// \param name The name of the configuration group. An empty string indicates
2019+
/// a global configuration option.
2020+
/// \param setting The name of the setting.
2021+
/// \param value The setting value.
2022+
/// \return a TRITONSERVER_Error indicating success or failure.
2023+
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
2024+
TRITONSERVER_ServerOptionsSetMetricsConfig(
2025+
TRITONSERVER_ServerOptions* options, const char* name, const char* setting,
2026+
const char* value);
2027+
20142028
/// TRITONSERVER_Server
20152029
///
20162030
/// An inference server.

src/backend_model_instance.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,14 @@ TritonModelInstance::TritonModelInstance(
157157
const int id = (kind_ == TRITONSERVER_INSTANCEGROUPKIND_GPU)
158158
? device_id_
159159
: METRIC_REPORTER_ID_CPU;
160+
// Let every metric reporter know if caching is enabled to correctly include
161+
// cache miss time into request duration on cache misses.
162+
const bool response_cache_enabled =
163+
model_->Config().response_cache().enable() &&
164+
model_->Server()->ResponseCacheEnabled();
160165
MetricModelReporter::Create(
161-
model_->Name(), model_->Version(), id, model_->Config().metric_tags(),
162-
&reporter_);
166+
model_->Name(), model_->Version(), id, response_cache_enabled,
167+
model_->Config().metric_tags(), &reporter_);
163168
}
164169
#endif // TRITON_ENABLE_METRICS
165170
}

src/dynamic_batch_scheduler.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,13 @@ DynamicBatchScheduler::DynamicBatchScheduler(
8080
// Both the server and model config should specify
8181
// caching enabled for model to utilize response cache.
8282
response_cache_enabled_ =
83-
(response_cache_enable && model_->Server()->ResponseCacheEnabled() &&
84-
model_->Server()->CacheManager() &&
85-
model_->Server()->CacheManager()->Cache());
83+
response_cache_enable && model_->Server()->ResponseCacheEnabled();
8684
#ifdef TRITON_ENABLE_METRICS
8785
// Initialize metric reporter for cache statistics if cache enabled
8886
if (response_cache_enabled_) {
8987
MetricModelReporter::Create(
9088
model_name_, model_->Version(), METRIC_REPORTER_ID_RESPONSE_CACHE,
91-
model_->Config().metric_tags(), &reporter_);
89+
response_cache_enabled_, model_->Config().metric_tags(), &reporter_);
9290
}
9391
#endif // TRITON_ENABLE_METRICS
9492
max_preferred_batch_size_ = 0;

src/ensemble_scheduler.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1316,8 +1316,11 @@ EnsembleScheduler::EnsembleScheduler(
13161316

13171317
#ifdef TRITON_ENABLE_METRICS
13181318
if (Metrics::Enabled()) {
1319+
// Ensemble scheduler doesn't currently support response cache at top level.
13191320
MetricModelReporter::Create(
1320-
config.name(), 1, METRIC_REPORTER_ID_CPU, config.metric_tags(),
1321+
config.name(), 1, METRIC_REPORTER_ID_CPU,
1322+
false /* response_cache_enabled */,
1323+
config.metric_tags(),
13211324
&metric_reporter_);
13221325
}
13231326
#endif // TRITON_ENABLE_METRICS

src/infer_stats.cc

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ InferenceStatsAggregator::UpdateFailure(
4747

4848
#ifdef TRITON_ENABLE_METRICS
4949
if (metric_reporter != nullptr) {
50-
metric_reporter->MetricInferenceFailure().Increment(1);
50+
metric_reporter->IncrementCounter("inf_failure", 1);
5151
}
5252
#endif // TRITON_ENABLE_METRICS
5353
}
@@ -97,18 +97,33 @@ InferenceStatsAggregator::UpdateSuccessWithDuration(
9797

9898
#ifdef TRITON_ENABLE_METRICS
9999
if (metric_reporter != nullptr) {
100-
metric_reporter->MetricInferenceSuccess().Increment(1);
101-
metric_reporter->MetricInferenceCount().Increment(batch_size);
102-
metric_reporter->MetricInferenceRequestDuration().Increment(
103-
request_duration_ns / 1000);
104-
metric_reporter->MetricInferenceQueueDuration().Increment(
105-
queue_duration_ns / 1000);
106-
metric_reporter->MetricInferenceComputeInputDuration().Increment(
107-
compute_input_duration_ns / 1000);
108-
metric_reporter->MetricInferenceComputeInferDuration().Increment(
109-
compute_infer_duration_ns / 1000);
110-
metric_reporter->MetricInferenceComputeOutputDuration().Increment(
111-
compute_output_duration_ns / 1000);
100+
metric_reporter->IncrementCounter("inf_success", 1);
101+
metric_reporter->IncrementCounter("inf_count", batch_size);
102+
// Counter Latencies
103+
metric_reporter->IncrementCounter(
104+
"request_duration", request_duration_ns / 1000);
105+
metric_reporter->IncrementCounter(
106+
"queue_duration", queue_duration_ns / 1000);
107+
metric_reporter->IncrementCounter(
108+
"compute_input_duration", compute_input_duration_ns / 1000);
109+
metric_reporter->IncrementCounter(
110+
"compute_infer_duration", compute_infer_duration_ns / 1000);
111+
metric_reporter->IncrementCounter(
112+
"compute_output_duration", compute_output_duration_ns / 1000);
113+
// Summary Latencies
114+
const auto& reporter_config = metric_reporter->Config();
115+
// FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
116+
if (!reporter_config.cache_enabled_) {
117+
metric_reporter->ObserveSummary(
118+
"request_duration", request_duration_ns / 1000);
119+
}
120+
metric_reporter->ObserveSummary("queue_duration", queue_duration_ns / 1000);
121+
metric_reporter->ObserveSummary(
122+
"compute_input_duration", compute_input_duration_ns / 1000);
123+
metric_reporter->ObserveSummary(
124+
"compute_infer_duration", compute_infer_duration_ns / 1000);
125+
metric_reporter->ObserveSummary(
126+
"compute_output_duration", compute_output_duration_ns / 1000);
112127
}
113128
#endif // TRITON_ENABLE_METRICS
114129
}
@@ -136,14 +151,23 @@ InferenceStatsAggregator::UpdateSuccessCacheHit(
136151

137152
#ifdef TRITON_ENABLE_METRICS
138153
if (metric_reporter != nullptr) {
139-
metric_reporter->MetricInferenceSuccess().Increment(1);
140-
metric_reporter->MetricInferenceRequestDuration().Increment(
141-
request_duration_ns / 1000);
142-
metric_reporter->MetricInferenceQueueDuration().Increment(
143-
queue_duration_ns / 1000);
144-
metric_reporter->MetricCacheHitCount().Increment(1);
145-
metric_reporter->MetricCacheHitDuration().Increment(
146-
cache_hit_duration_ns / 1000);
154+
// inf_count not recorded on a cache hit
155+
metric_reporter->IncrementCounter("inf_success", 1);
156+
// Counter Latencies
157+
metric_reporter->IncrementCounter(
158+
"request_duration", request_duration_ns / 1000);
159+
metric_reporter->IncrementCounter(
160+
"queue_duration", queue_duration_ns / 1000);
161+
metric_reporter->IncrementCounter("cache_hit_count", 1);
162+
metric_reporter->IncrementCounter(
163+
"cache_hit_duration", cache_hit_duration_ns / 1000);
164+
// Summary Latencies
165+
// FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
166+
// metric_reporter->ObserveSummary(
167+
// "request_duration", request_duration_ns / 1000);
168+
metric_reporter->ObserveSummary("queue_duration", queue_duration_ns / 1000);
169+
metric_reporter->ObserveSummary(
170+
"cache_hit_duration", cache_hit_duration_ns / 1000);
147171
}
148172
#endif // TRITON_ENABLE_METRICS
149173
}
@@ -168,11 +192,19 @@ InferenceStatsAggregator::UpdateSuccessCacheMiss(
168192
// happens after inference backend sets the request duration, and
169193
// cache lookup time was already included before the inference backend
170194
// was called
171-
metric_reporter->MetricInferenceRequestDuration().Increment(
172-
cache_miss_duration_ns / 1000);
173-
metric_reporter->MetricCacheMissCount().Increment(1);
174-
metric_reporter->MetricCacheMissDuration().Increment(
175-
cache_miss_duration_ns / 1000);
195+
metric_reporter->IncrementCounter(
196+
"request_duration", cache_miss_duration_ns / 1000);
197+
metric_reporter->IncrementCounter("cache_miss_count", 1);
198+
metric_reporter->IncrementCounter(
199+
"cache_miss_duration", cache_miss_duration_ns / 1000);
200+
201+
// FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
202+
// Need to account for adding cache miss duration on top of
203+
// request_duration from backend within a single observation.
204+
// metric_reporter->ObserveSummary(
205+
// "request_duration", cache_miss_duration_ns / 1000);
206+
metric_reporter->ObserveSummary(
207+
"cache_miss_duration", cache_miss_duration_ns / 1000);
176208
}
177209
#endif // TRITON_ENABLE_METRICS
178210
}
@@ -223,7 +255,7 @@ InferenceStatsAggregator::UpdateInferBatchStatsWithDuration(
223255

224256
#ifdef TRITON_ENABLE_METRICS
225257
if (metric_reporter != nullptr) {
226-
metric_reporter->MetricInferenceExecutionCount().Increment(1);
258+
metric_reporter->IncrementCounter("inf_exec_count", 1);
227259
}
228260
#endif // TRITON_ENABLE_METRICS
229261
}

0 commit comments

Comments
 (0)