Skip to content

Commit 98a37fd

Browse files
committed
feat: Extend response parameters support to BLS in python backend
1 parent 1ea48a6 commit 98a37fd

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,5 @@ dmypy.json
138138
# pytype static type analyzer
139139
.pytype/
140140

141+
# vscode
142+
.vscode/settings.json

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,9 @@ You can read more about the inference response parameters in the [parameters
803803
extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md)
804804
documentation.
805805

806-
Inference response parameters is currently not supported on BLS inference
807-
responses received by BLS models.
806+
Inference response parameters is supported when using BLS as well, i.e. when
807+
using BLS to call another model A, you can access the the optional parameters
808+
if set by A in its response.
808809

809810
## Managing Python Runtime and Libraries
810811

src/request_executor.cc

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ InferResponseComplete(
8484
std::unique_ptr<InferResponse> infer_response;
8585
std::vector<std::shared_ptr<PbTensor>> output_tensors;
8686
std::shared_ptr<PbError> pb_error;
87+
std::string parameters_string;
8788

8889
if (response != nullptr) {
8990
try {
@@ -153,21 +154,64 @@ InferResponseComplete(
153154
output_tensors.clear();
154155
}
155156

156-
// TODO: [DLIS-7864] Pass response parameters from BLS response.
157+
try {
158+
triton::common::TritonJson::Value parameters_json(
159+
triton::common::TritonJson::ValueType::OBJECT);
160+
uint32_t parameter_count;
161+
THROW_IF_TRITON_ERROR(
162+
TRITONSERVER_InferenceResponseParameterCount(response, &parameter_count));
163+
for (size_t i = 0; i < parameter_count; i++) {
164+
const char* name;
165+
TRITONSERVER_ParameterType type;
166+
const void* vvalue;
167+
THROW_IF_TRITON_ERROR(
168+
TRITONSERVER_InferenceResponseParameter(response, i, &name, &type, &vvalue));
169+
if (type == TRITONSERVER_PARAMETER_INT) {
170+
THROW_IF_TRITON_ERROR(parameters_json.AddInt(
171+
name, *(reinterpret_cast<const int64_t*>(vvalue))));
172+
} else if (type == TRITONSERVER_PARAMETER_BOOL) {
173+
THROW_IF_TRITON_ERROR(parameters_json.AddBool(
174+
name, *(reinterpret_cast<const bool*>(vvalue))));
175+
} else if (type == TRITONSERVER_PARAMETER_STRING) {
176+
std::string string = reinterpret_cast<const char*>(vvalue);
177+
THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string));
178+
} else if (type == TRITONSERVER_PARAMETER_DOUBLE) {
179+
THROW_IF_TRITON_ERROR(parameters_json.AddDouble(
180+
name, *(reinterpret_cast<const double*>(vvalue))));
181+
} else {
182+
throw PythonBackendException((std::string("Unsupported parameter type for parameter '") + name + "'."))
183+
}
184+
}
185+
186+
triton::common::TritonJson::WriteBuffer buffer;
187+
THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer));
188+
parameters_string = buffer.Contents();
189+
}
190+
catch (const PythonBackendException& pb_exception) {
191+
if (response != nullptr) {
192+
LOG_IF_ERROR(
193+
TRITONSERVER_InferenceResponseDelete(response),
194+
"Failed to delete inference response.");
195+
196+
response = nullptr;
197+
}
198+
pb_error = std::make_shared<PbError>(pb_exception.what());
199+
}
200+
157201
if (!infer_payload->IsDecoupled()) {
158202
infer_response = std::make_unique<InferResponse>(
159-
output_tensors, pb_error, "" /* parameters */,
203+
output_tensors, pb_error, parameters_string,
160204
true /* is_last_response */);
161205
} else {
162206
if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
163207
// Not the last response.
164208
infer_response = std::make_unique<InferResponse>(
165-
output_tensors, pb_error, "" /* parameters */,
209+
output_tensors, pb_error, parameters_string,
166210
false /* is_last_response */, userp /* id */);
167211
} else {
168212
// The last response.
169213
infer_response = std::make_unique<InferResponse>(
170-
output_tensors, pb_error, "" /* parameters */,
214+
output_tensors, pb_error, parameters_string,
171215
true /* is_last_response */, userp /* id */);
172216
}
173217
}

0 commit comments

Comments
 (0)