Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1041,7 +1041,8 @@ TRITONSERVER_InferenceRequestNew(
struct TRITONSERVER_Server* server, const char* model_name,
const int64_t model_version);

/// Delete an inference request object.
/// Delete an inference request object. The request object must be
/// released before deletion.
///
/// \param inference_request The request object.
/// \return a TRITONSERVER_Error indicating success or failure.
Expand Down
19 changes: 14 additions & 5 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ InferenceRequest::InferenceRequest(
SetPriority(0);
// Outer-most release callback to ensure a request has been taken, this
// callback won't be invoked, if certain flags are set.
release_callbacks_.emplace_back(
release_callbacks_.emplace_back(std::make_pair(
[](std::unique_ptr<InferenceRequest>& request,
const uint32_t flags) -> Status {
if (flags & TRITONSERVER_REQUEST_RELEASE_RESCHEDULE) {
Expand All @@ -123,7 +123,8 @@ InferenceRequest::InferenceRequest(
"configured to handle such a flag.");
}
return Status::Success;
});
},
false));
}

Status
Expand Down Expand Up @@ -476,9 +477,16 @@ InferenceRequest::Release(
{
// Invoke the release callbacks added internally before releasing the
// request to user provided callback.
for (auto it = request->release_callbacks_.rbegin();
it != request->release_callbacks_.rend(); it++) {
RETURN_IF_ERROR((*it)(request, release_flags));

// Invoke callbacks in reverse order. Evict internal callbacks for reusing
// inference request object.
auto& release_callbacks = request->release_callbacks_;
for (int i = release_callbacks.size() - 1; i >= 0; --i) {
auto [release_fn, is_internal] = release_callbacks[i];
if (is_internal) {
release_callbacks.erase(release_callbacks.begin() + i);
}
release_fn(request, release_flags);
if (request == nullptr) {
return Status::Success;
}
Expand All @@ -500,6 +508,7 @@ InferenceRequest::Release(
"Failed to set released state");
void* userp = request->release_userp_;
auto& release_fn = request->release_fn_;
LOG_INFO << "userp " << userp << std::endl;
release_fn(
reinterpret_cast<TRITONSERVER_InferenceRequest*>(request.release()),
release_flags, userp);
Expand Down
7 changes: 4 additions & 3 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ class InferenceRequest {
// and they will be invoked in reversed order.
Status AddInternalReleaseCallback(InternalReleaseFn&& callback)
{
release_callbacks_.emplace_back(std::move(callback));
release_callbacks_.emplace_back(std::make_pair(std::move(callback), true));
return Status::Success;
}

Expand Down Expand Up @@ -832,8 +832,9 @@ class InferenceRequest {
TRITONSERVER_InferenceRequestReleaseFn_t release_fn_;
void* release_userp_;

// Additional release callbacks invoked before 'release_fn_'.
std::vector<InternalReleaseFn> release_callbacks_;
// Additional release callbacks invoked before 'release_fn_'. Set boolean to
// true if release callback is internal and should be evicted after invoking.
std::vector<std::pair<InternalReleaseFn, bool>> release_callbacks_;

// Delegator to be invoked on sending responses.
std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
Expand Down