@@ -106,86 +106,81 @@ InferenceRequest::InferenceRequest(
106
106
: needs_normalization_(true ), model_raw_(model),
107
107
requested_model_version_ (requested_model_version), flags_(0 ),
108
108
correlation_id_(0 ), batch_size_(0 ), timeout_us_(0 ), collect_stats_(true ),
109
- state_(InferenceRequest::State::INITIALIZED), null_request_(false ),
110
- decrement_pending_count_(false )
109
+ state_(InferenceRequest::State::INITIALIZED), null_request_(false )
111
110
{
112
111
SetPriority (0 );
113
112
}
114
113
115
- InferenceRequest::~InferenceRequest ()
116
- {
117
- // If request has been enqueued but hasn't started executing by destruction
118
- // time, an error occurred and the pending request count will need to be
119
- // decremented.
120
- DecrementPendingRequestCount ();
121
- }
122
-
123
-
124
114
Status
125
115
InferenceRequest::SetState (InferenceRequest::State new_state)
126
116
{
117
+ LOG_VERBOSE (1 ) << LogRequest () << " Setting state from " << state_ << " to "
118
+ << new_state;
127
119
// No-op if this is already the current state, or if this is a null request.
128
120
if (new_state == state_ || null_request_) {
129
121
return Status::Success;
130
122
}
131
123
132
- // Allow RELEASED state transition from any state for now.
133
- // Not all requests will follow linear transition, such as null requests
134
- // used for padding batches, and ensemble requests.
135
- if (new_state == InferenceRequest::State::RELEASED) {
136
- state_ = new_state;
137
- return Status::Success;
138
- }
139
-
140
124
// Generate error when called rather than copying it into every case below.
141
125
const auto generate_error = [&]() {
142
126
std::stringstream ss;
143
127
ss << LogRequest () << " Invalid request state transition from " << state_
144
128
<< " to " << new_state;
145
- return Status (Status::Code::INVALID_ARG , ss.str ());
129
+ return Status (Status::Code::INTERNAL , ss.str ());
146
130
};
147
131
148
132
// Define state transitions
149
133
switch (state_) {
150
134
case InferenceRequest::State::INITIALIZED: {
151
- if (new_state != InferenceRequest::State::STARTED) {
135
+ if (new_state == InferenceRequest::State::PENDING) {
136
+ IncrementPendingRequestCount ();
137
+ } else if (new_state == InferenceRequest::State::RELEASED) {
138
+ // No-op when moving from initialized to released, just releasing early.
139
+ } else {
152
140
return generate_error ();
153
141
}
154
- state_ = new_state;
155
- IncrementPendingRequestCount ();
156
142
break ;
157
143
}
158
- case InferenceRequest::State::STARTED: {
159
- if (new_state != InferenceRequest::State::EXECUTING) {
144
+ case InferenceRequest::State::PENDING: {
145
+ // Request may move from pending to either execution when scheduled to
146
+ // backend, or released early due to some error.
147
+ if (new_state == InferenceRequest::State::EXECUTING ||
148
+ new_state == InferenceRequest::State::RELEASED) {
149
+ DecrementPendingRequestCount ();
150
+ } else {
151
+ // Unexpected state transition
160
152
return generate_error ();
161
153
}
162
- state_ = new_state;
163
- DecrementPendingRequestCount ();
164
154
break ;
165
155
}
166
156
case InferenceRequest::State::EXECUTING: {
167
157
if (new_state != InferenceRequest::State::RELEASED) {
168
158
return generate_error ();
169
159
}
170
- state_ = new_state;
171
160
break ;
172
161
}
173
162
case InferenceRequest::State::RELEASED: {
174
- // No state transition currently supported after release.
175
- return generate_error ();
163
+ if (new_state != InferenceRequest::State::INITIALIZED) {
164
+ // Only transition currently supported after release is to start over
165
+ // again, such as re-using request objects for multiple inferences.
166
+ return generate_error ();
167
+ }
168
+ break ;
176
169
}
177
170
}
171
+ state_ = new_state;
178
172
return Status::Success;
179
173
}
180
174
181
175
void
182
176
InferenceRequest::IncrementPendingRequestCount ()
183
177
{
184
178
#ifdef TRITON_ENABLE_METRICS
179
+ // Pending request count should always be 0 or 1 per-request. If a request
180
+ // increments the count, it should not be incremented again until decremented.
185
181
auto reporter = model_raw_->MetricReporter ();
186
182
if (reporter) {
187
183
reporter->IncrementGauge (kPendingRequestMetric , 1 );
188
- decrement_pending_count_ = true ;
189
184
}
190
185
#endif // TRITON_ENABLE_METRICS
191
186
}
@@ -194,13 +189,11 @@ void
194
189
InferenceRequest::DecrementPendingRequestCount ()
195
190
{
196
191
#ifdef TRITON_ENABLE_METRICS
197
- // Only decrement if count has been incremented, and not already decremented.
198
- if (decrement_pending_count_) {
199
- auto reporter = model_raw_->MetricReporter ();
200
- if (reporter) {
201
- reporter->DecrementGauge (kPendingRequestMetric , 1 );
202
- }
203
- decrement_pending_count_ = false ;
192
+ // Pending request count should always be 0 or 1 per-request. A request should
193
+ // not decrement the count unless it has already been incremented.
194
+ auto reporter = model_raw_->MetricReporter ();
195
+ if (reporter) {
196
+ reporter->DecrementGauge (kPendingRequestMetric , 1 );
204
197
}
205
198
#endif // TRITON_ENABLE_METRICS
206
199
}
@@ -376,7 +369,7 @@ InferenceRequest::OutputBufferProperties(
376
369
Status
377
370
InferenceRequest::Run (std::unique_ptr<InferenceRequest>& request)
378
371
{
379
- RETURN_IF_ERROR (request->SetState (InferenceRequest::State::STARTED ));
372
+ RETURN_IF_ERROR (request->SetState (InferenceRequest::State::PENDING ));
380
373
return request->model_raw_ ->Enqueue (request);
381
374
}
382
375
@@ -849,8 +842,10 @@ InferenceRequest::PrepareForInference()
849
842
request_start_ns_ = 0 ;
850
843
#endif // TRITON_ENABLE_STATS
851
844
852
- LOG_VERBOSE (1 ) << LogRequest () << " prepared: " << *this ;
845
+ // Help enforce that PrepareForInference() is called prior to Run().
846
+ RETURN_IF_ERROR (SetState (InferenceRequest::State::INITIALIZED));
853
847
848
+ LOG_VERBOSE (1 ) << LogRequest () << " prepared: " << *this ;
854
849
return Status::Success;
855
850
}
856
851
@@ -1580,8 +1575,8 @@ operator<<(std::ostream& out, const InferenceRequest::State& state)
1580
1575
out << " INITIALIZED" ;
1581
1576
break ;
1582
1577
}
1583
- case InferenceRequest::State::STARTED : {
1584
- out << " STARTED " ;
1578
+ case InferenceRequest::State::PENDING : {
1579
+ out << " PENDING " ;
1585
1580
break ;
1586
1581
}
1587
1582
case InferenceRequest::State::EXECUTING: {
0 commit comments