Skip to content

Commit c090130

Browse files
authored
Add request parameters API (#174)
* Add request parameters API * Fix typos * Fix up * Fix name of InferenceRequestSet* * Rename key->value * Fix build variants
1 parent 24e2d3a commit c090130

File tree

8 files changed

+188
-4
lines changed

8 files changed

+188
-4
lines changed

include/triton/core/tritonbackend.h

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include <stddef.h>
2929
#include <stdint.h>
30+
3031
#include "triton/core/tritonserver.h"
3132

3233
#ifdef __cplusplus
@@ -93,7 +94,7 @@ struct TRITONBACKEND_Batcher;
9394
/// }
9495
///
9596
#define TRITONBACKEND_API_VERSION_MAJOR 1
96-
#define TRITONBACKEND_API_VERSION_MINOR 11
97+
#define TRITONBACKEND_API_VERSION_MINOR 12
9798

9899
/// Get the TRITONBACKEND API version supported by Triton. This value
99100
/// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
@@ -406,6 +407,36 @@ TRITONBACKEND_RequestCorrelationIdString(
406407
TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestFlags(
407408
TRITONBACKEND_Request* request, uint32_t* flags);
408409

410+
/// Get the number of parameters specified in the inference request.
411+
///
412+
/// \param request The inference request.
413+
/// \param count Returns the number of parameters.
414+
/// \return a TRITONSERVER_Error indicating success or failure.
415+
TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestParameterCount(
416+
TRITONBACKEND_Request* request, uint32_t* count);
417+
418+
/// Get a request parameters by index. The order of parameters in a given
419+
/// request is not necessarily consistent with other requests, even if
420+
/// the requests are in the same batch. As a result, you can not
421+
/// assume that an index obtained from one request will point to the
422+
/// same parameter in a different request.
423+
///
424+
/// The lifetime of the returned parameter object matches that of the
425+
/// request and so the parameter object should not be accessed after the
426+
/// request object is released.
427+
///
428+
/// \param request The inference request.
429+
/// \param index The index of the parameter. Must be 0 <= index <
430+
/// count, where count is the value returned by
431+
/// TRITONBACKEND_RequestParameterCount.
432+
/// \param key Returns the key of the parameter.
433+
/// \param type Returns the type of the parameter.
434+
/// \param vvalue Returns a pointer to the parameter value.
435+
/// \return a TRITONSERVER_Error indicating success or failure.
436+
TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestParameter(
437+
TRITONBACKEND_Request* request, const uint32_t index, const char** key,
438+
TRITONSERVER_ParameterType* type, const void** vvalue);
439+
409440
/// Get the number of input tensors specified in the request.
410441
///
411442
/// \param request The inference request.
@@ -639,7 +670,7 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
639670
TRITONBACKEND_ResponseSetIntParameter(
640671
TRITONBACKEND_Response* response, const char* name, const int64_t value);
641672

642-
/// Set an boolean parameter in the response.
673+
/// Set a boolean parameter in the response.
643674
///
644675
/// \param response The response.
645676
/// \param name The name of the parameter.

include/triton/core/tritonserver.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
9191
/// }
9292
///
9393
#define TRITONSERVER_API_VERSION_MAJOR 1
94-
#define TRITONSERVER_API_VERSION_MINOR 20
94+
#define TRITONSERVER_API_VERSION_MINOR 21
9595

9696
/// Get the TRITONBACKEND API version supported by the Triton shared
9797
/// library. This value can be compared against the
@@ -1283,6 +1283,36 @@ TRITONSERVER_InferenceRequestSetResponseCallback(
12831283
TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
12841284
void* response_userp);
12851285

1286+
/// Set a string parameter in the request.
1287+
///
1288+
/// \param request The request.
1289+
/// \param key The name of the parameter.
1290+
/// \param value The value of the parameter.
1291+
/// \return a TRITONSERVER_Error indicating success or failure.
1292+
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
1293+
TRITONSERVER_InferenceRequestSetStringParameter(
1294+
TRITONSERVER_InferenceRequest* request, const char* key, const char* value);
1295+
1296+
/// Set an integer parameter in the request.
1297+
///
1298+
/// \param request The request.
1299+
/// \param key The name of the parameter.
1300+
/// \param value The value of the parameter.
1301+
/// \return a TRITONSERVER_Error indicating success or failure.
1302+
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
1303+
TRITONSERVER_InferenceRequestSetIntParameter(
1304+
TRITONSERVER_InferenceRequest* request, const char* key, const int64_t value);
1305+
1306+
/// Set a boolean parameter in the request.
1307+
///
1308+
/// \param request The request.
1309+
/// \param key The name of the parameter.
1310+
/// \param value The value of the parameter.
1311+
/// \return a TRITONSERVER_Error indicating success or failure.
1312+
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
1313+
TRITONSERVER_InferenceRequestSetBoolParameter(
1314+
TRITONSERVER_InferenceRequest* request, const char* key, const bool value);
1315+
12861316
/// TRITONSERVER_InferenceResponse
12871317
///
12881318
/// Object representing an inference response. The inference response

src/backend_model.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "backend_model.h"
2828

2929
#include <vector>
30+
3031
#include "backend_config.h"
3132
#include "backend_model_instance.h"
3233
#include "dynamic_batch_scheduler.h"
@@ -1236,6 +1237,43 @@ TRITONBACKEND_ResponseSend(
12361237
return nullptr; // success
12371238
}
12381239

1240+
TRITONAPI_DECLSPEC TRITONSERVER_Error*
1241+
TRITONBACKEND_RequestParameterCount(
1242+
TRITONBACKEND_Request* request, uint32_t* count)
1243+
{
1244+
InferenceRequest* lrequest = reinterpret_cast<InferenceRequest*>(request);
1245+
1246+
const auto& parameters = lrequest->Parameters();
1247+
*count = parameters.size();
1248+
1249+
return nullptr; // Success
1250+
}
1251+
1252+
TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
1253+
TRITONBACKEND_RequestParameter(
1254+
TRITONBACKEND_Request* request, const uint32_t index, const char** key,
1255+
TRITONSERVER_ParameterType* type, const void** vvalue)
1256+
{
1257+
InferenceRequest* lrequest = reinterpret_cast<InferenceRequest*>(request);
1258+
1259+
const auto& parameters = lrequest->Parameters();
1260+
if (index >= parameters.size()) {
1261+
return TRITONSERVER_ErrorNew(
1262+
TRITONSERVER_ERROR_INVALID_ARG,
1263+
("out of bounds index " + std::to_string(index) +
1264+
std::string(": request has ") + std::to_string(parameters.size()) +
1265+
" parameters").c_str());
1266+
}
1267+
1268+
const InferenceParameter& param = parameters[index];
1269+
1270+
*key = param.Name().c_str();
1271+
*type = param.Type();
1272+
*vvalue = param.ValuePointer();
1273+
1274+
return nullptr; // Success
1275+
}
1276+
12391277
///
12401278
/// TRITONBACKEND_Input
12411279
///

src/infer_request.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include <algorithm>
3030
#include <deque>
31+
3132
#include "model.h"
3233
#include "model_config_utils.h"
3334
#include "server.h"
@@ -130,6 +131,27 @@ InferenceRequest::SetPriority(uint32_t p)
130131
}
131132
}
132133

134+
Status
135+
InferenceRequest::AddParameter(const char* name, const char* value)
136+
{
137+
parameters_.emplace_back(name, value);
138+
return Status::Success;
139+
}
140+
141+
Status
142+
InferenceRequest::AddParameter(const char* name, const int64_t value)
143+
{
144+
parameters_.emplace_back(name, value);
145+
return Status::Success;
146+
}
147+
148+
Status
149+
InferenceRequest::AddParameter(const char* name, const bool value)
150+
{
151+
parameters_.emplace_back(name, value);
152+
return Status::Success;
153+
}
154+
133155
#ifdef TRITON_ENABLE_TRACING
134156
Status
135157
InferenceRequest::TraceInputTensors(

src/infer_request.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,16 @@ class InferenceRequest {
339339
TRITONSERVER_InferenceTraceActivity activity, const std::string& msg);
340340
#endif // TRITON_ENABLE_TRACING
341341

342+
// Add an parameter to the request.
343+
Status AddParameter(const char* name, const char* value);
344+
Status AddParameter(const char* name, const int64_t value);
345+
Status AddParameter(const char* name, const bool value);
346+
const std::deque<InferenceParameter>& Parameters() const
347+
{
348+
return parameters_;
349+
}
350+
351+
342352
// The original inputs are the inputs added to the request before
343353
// the inference execution (that is before
344354
// TRITONSERVER_ServerInferAsync is called). Once execution has
@@ -736,6 +746,10 @@ class InferenceRequest {
736746
// Whether the stats of the request should be collected.
737747
bool collect_stats_;
738748

749+
// The parameters of the request. Use a deque so that there is no
750+
// reallocation.
751+
std::deque<InferenceParameter> parameters_;
752+
739753
#ifdef TRITON_ENABLE_STATS
740754
uint64_t request_start_ns_;
741755
InferenceStatsAggregator* secondary_stats_aggregator_ = nullptr;

src/server.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include <memory>
3535
#include <utility>
3636
#include <vector>
37-
3837
#include "backend_manager.h"
3938
#include "constants.h"
4039
#include "cuda_utils.h"
@@ -90,6 +89,7 @@ InferenceServer::InferenceServer()
9089
extensions_.push_back("system_shared_memory");
9190
extensions_.push_back("cuda_shared_memory");
9291
extensions_.push_back("binary_tensor_data");
92+
extensions_.push_back("parameters");
9393
#ifdef TRITON_ENABLE_STATS
9494
extensions_.push_back("statistics");
9595
#endif // TRITON_ENABLE_STATS

src/tritonserver.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <string>
2828
#include <vector>
29+
2930
#include "buffer_attributes.h"
3031
#include "cuda_utils.h"
3132
#include "infer_parameter.h"
@@ -1877,6 +1878,34 @@ TRITONSERVER_InferenceRequestSetResponseCallback(
18771878
return nullptr; // Success
18781879
}
18791880

1881+
TRITONAPI_DECLSPEC TRITONSERVER_Error*
1882+
TRITONSERVER_InferenceRequestSetStringParameter(
1883+
TRITONSERVER_InferenceRequest* request, const char* name, const char* value)
1884+
{
1885+
tc::InferenceRequest* tr = reinterpret_cast<tc::InferenceRequest*>(request);
1886+
RETURN_IF_STATUS_ERROR(tr->AddParameter(name, value));
1887+
return nullptr; // success
1888+
}
1889+
1890+
TRITONAPI_DECLSPEC TRITONSERVER_Error*
1891+
TRITONSERVER_InferenceRequestSetIntParameter(
1892+
TRITONSERVER_InferenceRequest* request, const char* name,
1893+
const int64_t value)
1894+
{
1895+
tc::InferenceRequest* tr = reinterpret_cast<tc::InferenceRequest*>(request);
1896+
RETURN_IF_STATUS_ERROR(tr->AddParameter(name, value));
1897+
return nullptr; // success
1898+
}
1899+
1900+
TRITONAPI_DECLSPEC TRITONSERVER_Error*
1901+
TRITONSERVER_InferenceRequestSetBoolParameter(
1902+
TRITONSERVER_InferenceRequest* request, const char* name, const bool value)
1903+
{
1904+
tc::InferenceRequest* tr = reinterpret_cast<tc::InferenceRequest*>(request);
1905+
RETURN_IF_STATUS_ERROR(tr->AddParameter(name, value));
1906+
return nullptr; // success
1907+
}
1908+
18801909
//
18811910
// TRITONSERVER_InferenceResponse
18821911
//

src/tritonserver_stub.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,26 @@ TRITONBACKEND_RequestRelease()
683683
{
684684
}
685685
TRITONAPI_DECLSPEC void
686+
TRITONSERVER_InferenceRequestSetBoolParameter()
687+
{
688+
}
689+
TRITONAPI_DECLSPEC void
690+
TRITONSERVER_InferenceRequestSetIntParameter()
691+
{
692+
}
693+
TRITONAPI_DECLSPEC void
694+
TRITONSERVER_InferenceRequestSetStringParameter()
695+
{
696+
}
697+
TRITONAPI_DECLSPEC void
698+
TRITONBACKEND_RequestParameter()
699+
{
700+
}
701+
TRITONAPI_DECLSPEC void
702+
TRITONBACKEND_RequestParameterCount()
703+
{
704+
}
705+
TRITONAPI_DECLSPEC void
686706
TRITONBACKEND_ResponseFactoryNew()
687707
{
688708
}

0 commit comments

Comments
 (0)