1
- // Copyright 2021-2023 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ // Copyright 2021-2025 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
//
3
3
// Redistribution and use in source and binary forms, with or without
4
4
// modification, are permitted provided that the following conditions
@@ -39,8 +39,10 @@ namespace triton { namespace backend { namespace python {
39
39
40
40
InferResponse::InferResponse (
41
41
const std::vector<std::shared_ptr<PbTensor>>& output_tensors,
42
- std::shared_ptr<PbError> error, const bool is_last_response, void * id)
43
- : error_(error), is_last_response_(is_last_response), id_(id)
42
+ std::shared_ptr<PbError> error, std::string parameters,
43
+ const bool is_last_response, void * id)
44
+ : error_(error), is_last_response_(is_last_response), id_(id),
45
+ parameters_ (std::move(parameters))
44
46
{
45
47
for (auto & output : output_tensors) {
46
48
if (!output) {
@@ -58,6 +60,12 @@ InferResponse::OutputTensors()
58
60
return output_tensors_;
59
61
}
60
62
63
+ std::string&
64
+ InferResponse::Parameters ()
65
+ {
66
+ return parameters_;
67
+ }
68
+
61
69
bool
62
70
InferResponse::HasError ()
63
71
{
@@ -106,6 +114,9 @@ InferResponse::SaveToSharedMemory(
106
114
j++;
107
115
}
108
116
response_shm_ptr->id = id_;
117
+
118
+ parameters_shm_ = PbString::Create (shm_pool, parameters_);
119
+ response_shm_ptr->parameters = parameters_shm_->ShmHandle ();
109
120
}
110
121
}
111
122
@@ -143,6 +154,8 @@ InferResponse::LoadFromSharedMemory(
143
154
144
155
std::shared_ptr<PbError> pb_error;
145
156
std::vector<std::shared_ptr<PbTensor>> output_tensors;
157
+ std::shared_ptr<PbString> parameters_shm;
158
+ std::string parameters;
146
159
147
160
// If the error field is set, do not load output tensors from shared memory.
148
161
if (response_shm_ptr->has_error && response_shm_ptr->is_error_set ) {
@@ -154,33 +167,43 @@ InferResponse::LoadFromSharedMemory(
154
167
bi::managed_external_buffer::handle_t * tensor_handle_shm =
155
168
reinterpret_cast <bi::managed_external_buffer::handle_t *>(
156
169
response_shm.data_ .get () + sizeof (ResponseShm));
170
+ {
157
171
#ifdef TRITON_PB_STUB
158
- // Need to acquire the GIL to avoid hangs.
159
- py::gil_scoped_acquire acquire;
172
+ // Need to acquire the GIL to avoid hangs.
173
+ py::gil_scoped_acquire acquire;
160
174
#endif
161
- for (size_t idx = 0 ; idx < requested_output_count; ++idx) {
162
- std::shared_ptr<PbTensor> pb_tensor = PbTensor::LoadFromSharedMemory (
163
- shm_pool, tensor_handle_shm[idx], open_cuda_handle);
164
- output_tensors.emplace_back (std::move (pb_tensor));
175
+ for (size_t idx = 0 ; idx < requested_output_count; ++idx) {
176
+ std::shared_ptr<PbTensor> pb_tensor = PbTensor::LoadFromSharedMemory (
177
+ shm_pool, tensor_handle_shm[idx], open_cuda_handle);
178
+ output_tensors.emplace_back (std::move (pb_tensor));
179
+ }
165
180
}
181
+
182
+ parameters_shm = std::move (
183
+ PbString::LoadFromSharedMemory (shm_pool, response_shm_ptr->parameters ));
184
+ parameters = parameters_shm->String ();
166
185
}
167
186
168
187
return std::unique_ptr<InferResponse>(new InferResponse (
169
188
response_shm, output_tensors, pb_error,
170
- response_shm_ptr->is_last_response , response_shm_ptr->id ));
189
+ response_shm_ptr->is_last_response , response_shm_ptr->id , parameters_shm,
190
+ parameters));
171
191
}
172
192
173
193
InferResponse::InferResponse (
174
194
AllocatedSharedMemory<char >& response_shm,
175
195
std::vector<std::shared_ptr<PbTensor>>& output_tensors,
176
- std::shared_ptr<PbError>& pb_error, const bool is_last_response, void * id)
196
+ std::shared_ptr<PbError>& pb_error, const bool is_last_response, void * id,
197
+ std::shared_ptr<PbString>& parameters_shm, std::string& parameters)
177
198
{
178
199
response_shm_ = std::move (response_shm);
179
200
output_tensors_ = std::move (output_tensors);
180
201
error_ = std::move (pb_error);
181
202
shm_handle_ = response_shm_.handle_ ;
182
203
id_ = id;
183
204
is_last_response_ = is_last_response;
205
+ parameters_shm_ = std::move (parameters_shm);
206
+ parameters_ = std::move (parameters);
184
207
}
185
208
186
209
std::shared_ptr<PbError>&
@@ -387,6 +410,38 @@ InferResponse::Send(
387
410
cuda_copy |= cuda_used;
388
411
}
389
412
413
+ if (!parameters_.empty ()) {
414
+ triton::common::TritonJson::Value param;
415
+ THROW_IF_TRITON_ERROR (
416
+ param.Parse (parameters_.c_str (), parameters_.length ()));
417
+ std::vector<std::string> param_keys;
418
+ THROW_IF_TRITON_ERROR (param.Members (¶m_keys));
419
+ for (const auto & key : param_keys) {
420
+ triton::common::TritonJson::Value value;
421
+ if (!param.Find (key.c_str (), &value)) {
422
+ throw PythonBackendException (" Unexpected missing key on parameters" );
423
+ }
424
+ if (value.IsString ()) {
425
+ std::string string_value;
426
+ THROW_IF_TRITON_ERROR (value.AsString (&string_value));
427
+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetStringParameter (
428
+ response, key.c_str (), string_value.c_str ()));
429
+ } else if (value.IsInt ()) {
430
+ int64_t int_value = 0 ;
431
+ THROW_IF_TRITON_ERROR (value.AsInt (&int_value));
432
+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetIntParameter (
433
+ response, key.c_str (), int_value));
434
+ } else if (value.IsBool ()) {
435
+ bool bool_value = false ;
436
+ THROW_IF_TRITON_ERROR (value.AsBool (&bool_value));
437
+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetBoolParameter (
438
+ response, key.c_str (), bool_value));
439
+ } else {
440
+ throw PythonBackendException (" Unsupported value type on parameters" );
441
+ }
442
+ }
443
+ }
444
+
390
445
#ifdef TRITON_ENABLE_GPU
391
446
if (cuda_copy) {
392
447
cudaStreamSynchronize (reinterpret_cast <cudaStream_t>(cuda_stream));
0 commit comments