@@ -47,7 +47,7 @@ InferenceStatsAggregator::UpdateFailure(
47
47
48
48
#ifdef TRITON_ENABLE_METRICS
49
49
if (metric_reporter != nullptr ) {
50
- metric_reporter->MetricInferenceFailure (). Increment ( 1 );
50
+ metric_reporter->IncrementCounter ( " inf_failure " , 1 );
51
51
}
52
52
#endif // TRITON_ENABLE_METRICS
53
53
}
@@ -97,18 +97,33 @@ InferenceStatsAggregator::UpdateSuccessWithDuration(
97
97
98
98
#ifdef TRITON_ENABLE_METRICS
99
99
if (metric_reporter != nullptr ) {
100
- metric_reporter->MetricInferenceSuccess ().Increment (1 );
101
- metric_reporter->MetricInferenceCount ().Increment (batch_size);
102
- metric_reporter->MetricInferenceRequestDuration ().Increment (
103
- request_duration_ns / 1000 );
104
- metric_reporter->MetricInferenceQueueDuration ().Increment (
105
- queue_duration_ns / 1000 );
106
- metric_reporter->MetricInferenceComputeInputDuration ().Increment (
107
- compute_input_duration_ns / 1000 );
108
- metric_reporter->MetricInferenceComputeInferDuration ().Increment (
109
- compute_infer_duration_ns / 1000 );
110
- metric_reporter->MetricInferenceComputeOutputDuration ().Increment (
111
- compute_output_duration_ns / 1000 );
100
+ metric_reporter->IncrementCounter (" inf_success" , 1 );
101
+ metric_reporter->IncrementCounter (" inf_count" , batch_size);
102
+ // Counter Latencies
103
+ metric_reporter->IncrementCounter (
104
+ " request_duration" , request_duration_ns / 1000 );
105
+ metric_reporter->IncrementCounter (
106
+ " queue_duration" , queue_duration_ns / 1000 );
107
+ metric_reporter->IncrementCounter (
108
+ " compute_input_duration" , compute_input_duration_ns / 1000 );
109
+ metric_reporter->IncrementCounter (
110
+ " compute_infer_duration" , compute_infer_duration_ns / 1000 );
111
+ metric_reporter->IncrementCounter (
112
+ " compute_output_duration" , compute_output_duration_ns / 1000 );
113
+ // Summary Latencies
114
+ const auto & reporter_config = metric_reporter->Config ();
115
+ // FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
116
+ if (!reporter_config.cache_enabled_ ) {
117
+ metric_reporter->ObserveSummary (
118
+ " request_duration" , request_duration_ns / 1000 );
119
+ }
120
+ metric_reporter->ObserveSummary (" queue_duration" , queue_duration_ns / 1000 );
121
+ metric_reporter->ObserveSummary (
122
+ " compute_input_duration" , compute_input_duration_ns / 1000 );
123
+ metric_reporter->ObserveSummary (
124
+ " compute_infer_duration" , compute_infer_duration_ns / 1000 );
125
+ metric_reporter->ObserveSummary (
126
+ " compute_output_duration" , compute_output_duration_ns / 1000 );
112
127
}
113
128
#endif // TRITON_ENABLE_METRICS
114
129
}
@@ -136,14 +151,23 @@ InferenceStatsAggregator::UpdateSuccessCacheHit(
136
151
137
152
#ifdef TRITON_ENABLE_METRICS
138
153
if (metric_reporter != nullptr ) {
139
- metric_reporter->MetricInferenceSuccess ().Increment (1 );
140
- metric_reporter->MetricInferenceRequestDuration ().Increment (
141
- request_duration_ns / 1000 );
142
- metric_reporter->MetricInferenceQueueDuration ().Increment (
143
- queue_duration_ns / 1000 );
144
- metric_reporter->MetricCacheHitCount ().Increment (1 );
145
- metric_reporter->MetricCacheHitDuration ().Increment (
146
- cache_hit_duration_ns / 1000 );
154
+ // inf_count not recorded on a cache hit
155
+ metric_reporter->IncrementCounter (" inf_success" , 1 );
156
+ // Counter Latencies
157
+ metric_reporter->IncrementCounter (
158
+ " request_duration" , request_duration_ns / 1000 );
159
+ metric_reporter->IncrementCounter (
160
+ " queue_duration" , queue_duration_ns / 1000 );
161
+ metric_reporter->IncrementCounter (" cache_hit_count" , 1 );
162
+ metric_reporter->IncrementCounter (
163
+ " cache_hit_duration" , cache_hit_duration_ns / 1000 );
164
+ // Summary Latencies
165
+ // FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
166
+ // metric_reporter->ObserveSummary(
167
+ // "request_duration", request_duration_ns / 1000);
168
+ metric_reporter->ObserveSummary (" queue_duration" , queue_duration_ns / 1000 );
169
+ metric_reporter->ObserveSummary (
170
+ " cache_hit_duration" , cache_hit_duration_ns / 1000 );
147
171
}
148
172
#endif // TRITON_ENABLE_METRICS
149
173
}
@@ -168,11 +192,19 @@ InferenceStatsAggregator::UpdateSuccessCacheMiss(
168
192
// happens after inference backend sets the request duration, and
169
193
// cache lookup time was already included before the inference backend
170
194
// was called
171
- metric_reporter->MetricInferenceRequestDuration ().Increment (
172
- cache_miss_duration_ns / 1000 );
173
- metric_reporter->MetricCacheMissCount ().Increment (1 );
174
- metric_reporter->MetricCacheMissDuration ().Increment (
175
- cache_miss_duration_ns / 1000 );
195
+ metric_reporter->IncrementCounter (
196
+ " request_duration" , cache_miss_duration_ns / 1000 );
197
+ metric_reporter->IncrementCounter (" cache_miss_count" , 1 );
198
+ metric_reporter->IncrementCounter (
199
+ " cache_miss_duration" , cache_miss_duration_ns / 1000 );
200
+
201
+ // FIXME [DLIS-4762]: request summary is disabled when cache is enabled.
202
+ // Need to account for adding cache miss duration on top of
203
+ // request_duration from backend within a single observation.
204
+ // metric_reporter->ObserveSummary(
205
+ // "request_duration", cache_miss_duration_ns / 1000);
206
+ metric_reporter->ObserveSummary (
207
+ " cache_miss_duration" , cache_miss_duration_ns / 1000 );
176
208
}
177
209
#endif // TRITON_ENABLE_METRICS
178
210
}
@@ -223,7 +255,7 @@ InferenceStatsAggregator::UpdateInferBatchStatsWithDuration(
223
255
224
256
#ifdef TRITON_ENABLE_METRICS
225
257
if (metric_reporter != nullptr ) {
226
- metric_reporter->MetricInferenceExecutionCount (). Increment ( 1 );
258
+ metric_reporter->IncrementCounter ( " inf_exec_count " , 1 );
227
259
}
228
260
#endif // TRITON_ENABLE_METRICS
229
261
}
0 commit comments