diff --git a/CMakeLists.txt b/CMakeLists.txt index 61870a8..a16b3a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,6 +211,7 @@ if(${TRITON_COMMON_ENABLE_GRPC}) TARGETS grpc-health-library grpc-service-library + grpccallback-service-library # grpc-service-py-library EXPORT triton-common-targets diff --git a/protobuf/CMakeLists.txt b/protobuf/CMakeLists.txt index 34f8374..048cb37 100644 --- a/protobuf/CMakeLists.txt +++ b/protobuf/CMakeLists.txt @@ -159,6 +159,58 @@ if(${TRITON_COMMON_ENABLE_GRPC}) ) endif() +# +# GRPC Callback Service +# +if(${TRITON_COMMON_ENABLE_GRPC}) + get_filename_component(grpccallback_proto_abspath "grpccallback_service.proto" ABSOLUTE) + get_filename_component(grpccallback_proto_dir "${grpccallback_proto_abspath}" PATH) + set(GRPCCALLBACK_SRCS "grpccallback_service.grpc.pb.cc") + set(GRPCCALLBACK_HDRS "grpccallback_service.grpc.pb.h") + + add_custom_command( + OUTPUT "${GRPCCALLBACK_SRCS}" "${GRPCCALLBACK_HDRS}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS + --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${grpccallback_proto_dir}" + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "grpccallback_service.proto" + DEPENDS "grpccallback_service.proto" proto-library + ) + + add_library( + grpccallback-service-library EXCLUDE_FROM_ALL OBJECT + ${GRPCCALLBACK_SRCS} ${GRPCCALLBACK_HDRS} + ) + + target_include_directories( + grpccallback-service-library + PUBLIC + $ + ) + + target_link_libraries( + grpccallback-service-library + PRIVATE + common-compile-settings + ) + + set_target_properties( + grpccallback-service-library + PROPERTIES + POSITION_INDEPENDENT_CODE ON + ) + + install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/grpccallback_service.grpc.pb.h + DESTINATION include + OPTIONAL + ) +endif() + # # GRPC Health Service # diff --git a/protobuf/grpc_service.proto b/protobuf/grpc_service.proto index 451dd74..963b58c 100644 --- a/protobuf/grpc_service.proto +++ b/protobuf/grpc_service.proto @@ -39,45 +39,10 @@ import "model_config.proto"; //@@ service GRPCInferenceService { - //@@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns - //@@ (ServerLiveResponse) - //@@ - //@@ Check liveness of the inference server. - //@@ - rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} - - //@@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns - //@@ (ServerReadyResponse) - //@@ - //@@ Check readiness of the inference server. - //@@ - rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} - - //@@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns - //@@ (ModelReadyResponse) - //@@ - //@@ Check readiness of a model in the inference server. - //@@ - rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} - - //@@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns - //@@ (ServerMetadataResponse) - //@@ - //@@ Get server metadata. - //@@ - rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} - - //@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns - //@@ (ModelMetadataResponse) - //@@ - //@@ Get model metadata. - //@@ - rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} - //@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns //@@ (ModelInferResponse) //@@ - //@@ Perform inference using a specific model. + //@@ Perform inference using this specific model. //@@ rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} @@ -90,1721 +55,4 @@ service GRPCInferenceService returns (stream ModelStreamInferResponse) { } - - //@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns - //@@ (ModelConfigResponse) - //@@ - //@@ Get model configuration. - //@@ - rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {} - - //@@ .. cpp:var:: rpc ModelStatistics( - //@@ ModelStatisticsRequest) - //@@ returns (ModelStatisticsResponse) - //@@ - //@@ Get the cumulative inference statistics for a model. - //@@ - rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse) - { - } - - //@@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns - //@@ (RepositoryIndexResponse) - //@@ - //@@ Get the index of model repository contents. - //@@ - rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse) - { - } - - //@@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns - //@@ (RepositoryModelLoadResponse) - //@@ - //@@ Load or reload a model from a repository. - //@@ - rpc RepositoryModelLoad(RepositoryModelLoadRequest) - returns (RepositoryModelLoadResponse) - { - } - - //@@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) - //@@ returns (RepositoryModelUnloadResponse) - //@@ - //@@ Unload a model. - //@@ - rpc RepositoryModelUnload(RepositoryModelUnloadRequest) - returns (RepositoryModelUnloadResponse) - { - } - - //@@ .. cpp:var:: rpc SystemSharedMemoryStatus( - //@@ SystemSharedMemoryStatusRequest) - //@@ returns (SystemSharedMemoryStatusRespose) - //@@ - //@@ Get the status of all registered system-shared-memory regions. - //@@ - rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest) - returns (SystemSharedMemoryStatusResponse) - { - } - - //@@ .. cpp:var:: rpc SystemSharedMemoryRegister( - //@@ SystemSharedMemoryRegisterRequest) - //@@ returns (SystemSharedMemoryRegisterResponse) - //@@ - //@@ Register a system-shared-memory region. - //@@ - rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest) - returns (SystemSharedMemoryRegisterResponse) - { - } - - //@@ .. cpp:var:: rpc SystemSharedMemoryUnregister( - //@@ SystemSharedMemoryUnregisterRequest) - //@@ returns (SystemSharedMemoryUnregisterResponse) - //@@ - //@@ Unregister a system-shared-memory region. - //@@ - rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest) - returns (SystemSharedMemoryUnregisterResponse) - { - } - - //@@ .. cpp:var:: rpc CudaSharedMemoryStatus( - //@@ CudaSharedMemoryStatusRequest) - //@@ returns (CudaSharedMemoryStatusRespose) - //@@ - //@@ Get the status of all registered CUDA-shared-memory regions. - //@@ - rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest) - returns (CudaSharedMemoryStatusResponse) - { - } - - //@@ .. cpp:var:: rpc CudaSharedMemoryRegister( - //@@ CudaSharedMemoryRegisterRequest) - //@@ returns (CudaSharedMemoryRegisterResponse) - //@@ - //@@ Register a CUDA-shared-memory region. - //@@ - rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest) - returns (CudaSharedMemoryRegisterResponse) - { - } - - //@@ .. cpp:var:: rpc CudaSharedMemoryUnregister( - //@@ CudaSharedMemoryUnregisterRequest) - //@@ returns (CudaSharedMemoryUnregisterResponse) - //@@ - //@@ Unregister a CUDA-shared-memory region. - //@@ - rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest) - returns (CudaSharedMemoryUnregisterResponse) - { - } - - //@@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) - //@@ returns (TraceSettingResponse) - //@@ - //@@ Update and get the trace setting of the Triton server. - //@@ - rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse) {} - - //@@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) - //@@ returns (LogSettingsResponse) - //@@ - //@@ Update and get the log settings of the Triton server. - //@@ - rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse) {} -} - -//@@ -//@@.. cpp:var:: message ServerLiveRequest -//@@ -//@@ Request message for ServerLive. -//@@ -message ServerLiveRequest {} - -//@@ -//@@.. cpp:var:: message ServerLiveResponse -//@@ -//@@ Response message for ServerLive. -//@@ -message ServerLiveResponse -{ - //@@ - //@@ .. cpp:var:: bool live - //@@ - //@@ True if the inference server is live, false it not live. - //@@ - bool live = 1; -} - -//@@ -//@@.. cpp:var:: message ServerReadyRequest -//@@ -//@@ Request message for ServerReady. -//@@ -message ServerReadyRequest {} - -//@@ -//@@.. cpp:var:: message ServerReadyResponse -//@@ -//@@ Response message for ServerReady. -//@@ -message ServerReadyResponse -{ - //@@ - //@@ .. cpp:var:: bool ready - //@@ - //@@ True if the inference server is ready, false it not ready. - //@@ - bool ready = 1; -} - -//@@ -//@@.. cpp:var:: message ModelReadyRequest -//@@ -//@@ Request message for ModelReady. -//@@ -message ModelReadyRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model to check for readiness. - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model to check for readiness. If not given the - //@@ server will choose a version based on the model and internal policy. - //@@ - string version = 2; -} - -//@@ -//@@.. cpp:var:: message ModelReadyResponse -//@@ -//@@ Response message for ModelReady. -//@@ -message ModelReadyResponse -{ - //@@ - //@@ .. cpp:var:: bool ready - //@@ - //@@ True if the model is ready, false it not ready. - //@@ - bool ready = 1; -} - -//@@ -//@@.. cpp:var:: message ServerMetadataRequest -//@@ -//@@ Request message for ServerMetadata. -//@@ -message ServerMetadataRequest {} - -//@@ -//@@.. cpp:var:: message ServerMetadataResponse -//@@ -//@@ Response message for ServerMetadata. -//@@ -message ServerMetadataResponse -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The server name. - //@@ - string name = 1; - - //@@ - //@@ .. cpp:var:: string version - //@@ - //@@ The server version. - //@@ - string version = 2; - - //@@ - //@@ .. cpp:var:: string extensions (repeated) - //@@ - //@@ The extensions supported by the server. - //@@ - repeated string extensions = 3; -} - -//@@ -//@@.. cpp:var:: message ModelMetadataRequest -//@@ -//@@ Request message for ModelMetadata. -//@@ -message ModelMetadataRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model to check for readiness. If not - //@@ given the server will choose a version based on the - //@@ model and internal policy. - //@@ - string version = 2; -} - -//@@ -//@@.. cpp:var:: message ModelMetadataResponse -//@@ -//@@ Response message for ModelMetadata. -//@@ -message ModelMetadataResponse -{ - //@@ - //@@ .. cpp:var:: message TensorMetadata - //@@ - //@@ Metadata for a tensor. - //@@ - message TensorMetadata - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The tensor name. - //@@ - string name = 1; - - //@@ - //@@ .. cpp:var:: string datatype - //@@ - //@@ The tensor data type. - //@@ - string datatype = 2; - - //@@ - //@@ .. cpp:var:: int64 shape (repeated) - //@@ - //@@ The tensor shape. A variable-size dimension is represented - //@@ by a -1 value. - //@@ - repeated int64 shape = 3; - } - - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The model name. - //@@ - string name = 1; - - //@@ - //@@ .. cpp:var:: string versions (repeated) - //@@ - //@@ The versions of the model. - //@@ - repeated string versions = 2; - - //@@ - //@@ .. cpp:var:: string platform - //@@ - //@@ The model's platform. - //@@ - string platform = 3; - - //@@ - //@@ .. cpp:var:: TensorMetadata inputs (repeated) - //@@ - //@@ The model's inputs. - //@@ - repeated TensorMetadata inputs = 4; - - //@@ - //@@ .. cpp:var:: TensorMetadata outputs (repeated) - //@@ - //@@ The model's outputs. - //@@ - repeated TensorMetadata outputs = 5; -} - -//@@ -//@@.. cpp:var:: message InferParameter -//@@ -//@@ An inference parameter value. -//@@ -message InferParameter -{ - //@@ .. cpp:var:: oneof parameter_choice - //@@ - //@@ The parameter value can be a string, an int64, - //@@ an uint64, a double, or a boolean - //@@ - //@@ Note: double and uint64 are currently - //@@ placeholders for future use and - //@@ are not supported for custom parameters - //@@ - oneof parameter_choice - { - //@@ .. cpp:var:: bool bool_param - //@@ - //@@ A boolean parameter value. - //@@ - bool bool_param = 1; - - //@@ .. cpp:var:: int64 int64_param - //@@ - //@@ An int64 parameter value. - //@@ - int64 int64_param = 2; - - //@@ .. cpp:var:: string string_param - //@@ - //@@ A string parameter value. - //@@ - string string_param = 3; - - //@@ .. cpp:var:: double double_param - //@@ - //@@ A double parameter value. - //@@ - double double_param = 4; - - //@@ .. cpp:var:: uint64 uint64_param - //@@ - //@@ A uint64 parameter value. - //@@ - //@@ Not supported for custom parameters - //@@ - uint64 uint64_param = 5; - } -} - -//@@ -//@@.. cpp:var:: message InferTensorContents -//@@ -//@@ The data contained in a tensor represented by the repeated type -//@@ that matches the tensor's data type. Protobuf oneof is not used -//@@ because oneofs cannot contain repeated fields. -//@@ -message InferTensorContents -{ - //@@ - //@@ .. cpp:var:: bool bool_contents (repeated) - //@@ - //@@ Representation for BOOL data type. The size must match what is - //@@ expected by the tensor's shape. The contents must be the flattened, - //@@ one-dimensional, row-major order of the tensor elements. - //@@ - repeated bool bool_contents = 1; - - //@@ - //@@ .. cpp:var:: int32 int_contents (repeated) - //@@ - //@@ Representation for INT8, INT16, and INT32 data types. The size - //@@ must match what is expected by the tensor's shape. The contents - //@@ must be the flattened, one-dimensional, row-major order of the - //@@ tensor elements. - //@@ - repeated int32 int_contents = 2; - - //@@ - //@@ .. cpp:var:: int64 int64_contents (repeated) - //@@ - //@@ Representation for INT64 data types. The size must match what - //@@ is expected by the tensor's shape. The contents must be the - //@@ flattened, one-dimensional, row-major order of the tensor elements. - //@@ - repeated int64 int64_contents = 3; - - //@@ - //@@ .. cpp:var:: uint32 uint_contents (repeated) - //@@ - //@@ Representation for UINT8, UINT16, and UINT32 data types. The size - //@@ must match what is expected by the tensor's shape. The contents - //@@ must be the flattened, one-dimensional, row-major order of the - //@@ tensor elements. - //@@ - repeated uint32 uint_contents = 4; - - //@@ - //@@ .. cpp:var:: uint64 uint64_contents (repeated) - //@@ - //@@ Representation for UINT64 data types. The size must match what - //@@ is expected by the tensor's shape. The contents must be the - //@@ flattened, one-dimensional, row-major order of the tensor elements. - //@@ - repeated uint64 uint64_contents = 5; - - //@@ - //@@ .. cpp:var:: float fp32_contents (repeated) - //@@ - //@@ Representation for FP32 data type. The size must match what is - //@@ expected by the tensor's shape. The contents must be the flattened, - //@@ one-dimensional, row-major order of the tensor elements. - //@@ - repeated float fp32_contents = 6; - - //@@ - //@@ .. cpp:var:: double fp64_contents (repeated) - //@@ - //@@ Representation for FP64 data type. The size must match what is - //@@ expected by the tensor's shape. The contents must be the flattened, - //@@ one-dimensional, row-major order of the tensor elements. - //@@ - repeated double fp64_contents = 7; - - //@@ - //@@ .. cpp:var:: bytes bytes_contents (repeated) - //@@ - //@@ Representation for BYTES data type. The size must match what is - //@@ expected by the tensor's shape. The contents must be the flattened, - //@@ one-dimensional, row-major order of the tensor elements. - //@@ - repeated bytes bytes_contents = 8; -} - -//@@ -//@@.. cpp:var:: message ModelInferRequest -//@@ -//@@ Request message for ModelInfer. -//@@ -message ModelInferRequest -{ - //@@ - //@@ .. cpp:var:: message InferInputTensor - //@@ - //@@ An input tensor for an inference request. - //@@ - message InferInputTensor - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The tensor name. - //@@ - string name = 1; - - //@@ - //@@ .. cpp:var:: string datatype - //@@ - //@@ The tensor data type. - //@@ - string datatype = 2; - - //@@ - //@@ .. cpp:var:: int64 shape (repeated) - //@@ - //@@ The tensor shape. - //@@ - repeated int64 shape = 3; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional inference input tensor parameters. - //@@ - map parameters = 4; - - //@@ .. cpp:var:: InferTensorContents contents - //@@ - //@@ The tensor contents using a data-type format. This field - //@@ must not be specified if tensor contents are being specified - //@@ in ModelInferRequest.raw_input_contents. - //@@ - InferTensorContents contents = 5; - } - - //@@ - //@@ .. cpp:var:: message InferRequestedOutputTensor - //@@ - //@@ An output tensor requested for an inference request. - //@@ - message InferRequestedOutputTensor - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The tensor name. - //@@ - string name = 1; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional requested output tensor parameters. - //@@ - map parameters = 2; - } - - //@@ .. cpp:var:: string model_name - //@@ - //@@ The name of the model to use for inferencing. - //@@ - string model_name = 1; - - //@@ .. cpp:var:: string model_version - //@@ - //@@ The version of the model to use for inference. If not - //@@ given the latest/most-recent version of the model is used. - //@@ - string model_version = 2; - - //@@ .. cpp:var:: string id - //@@ - //@@ Optional identifier for the request. If specified will be - //@@ returned in the response. - //@@ - string id = 3; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional inference parameters. - //@@ - map parameters = 4; - - //@@ - //@@ .. cpp:var:: InferInputTensor inputs (repeated) - //@@ - //@@ The input tensors for the inference. - //@@ - repeated InferInputTensor inputs = 5; - - //@@ - //@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated) - //@@ - //@@ The requested output tensors for the inference. Optional, if not - //@@ specified all outputs specified in the model config will be - //@@ returned. - //@@ - repeated InferRequestedOutputTensor outputs = 6; - - //@@ - //@@ .. cpp:var:: bytes raw_input_contents - //@@ - //@@ The data contained in an input tensor can be represented in - //@@ "raw" bytes form or in the repeated type that matches the - //@@ tensor's data type. Using the "raw" bytes form will - //@@ typically allow higher performance due to the way protobuf - //@@ allocation and reuse interacts with GRPC. For example, see - //@@ https://github.com/grpc/grpc/issues/23231. - //@@ - //@@ To use the raw representation 'raw_input_contents' must be - //@@ initialized with data for each tensor in the same order as - //@@ 'inputs'. For each tensor, the size of this content must - //@@ match what is expected by the tensor's shape and data - //@@ type. The raw data must be the flattened, one-dimensional, - //@@ row-major order of the tensor elements without any stride - //@@ or padding between the elements. Note that the FP16 and BF16 data - //@@ types must be represented as raw content as there is no - //@@ specific data type for a 16-bit float type. - //@@ - //@@ If this field is specified then InferInputTensor::contents - //@@ must not be specified for any input tensor. - //@@ - repeated bytes raw_input_contents = 7; -} - -//@@ -//@@.. cpp:var:: message ModelInferResponse -//@@ -//@@ Response message for ModelInfer. -//@@ -message ModelInferResponse -{ - //@@ - //@@ .. cpp:var:: message InferOutputTensor - //@@ - //@@ An output tensor returned for an inference request. - //@@ - message InferOutputTensor - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The tensor name. - //@@ - string name = 1; - - //@@ - //@@ .. cpp:var:: string datatype - //@@ - //@@ The tensor data type. - //@@ - string datatype = 2; - - //@@ - //@@ .. cpp:var:: int64 shape (repeated) - //@@ - //@@ The tensor shape. - //@@ - repeated int64 shape = 3; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional output tensor parameters. - //@@ - map parameters = 4; - - //@@ .. cpp:var:: InferTensorContents contents - //@@ - //@@ The tensor contents using a data-type format. This field - //@@ must not be specified if tensor contents are being specified - //@@ in ModelInferResponse.raw_output_contents. - //@@ - InferTensorContents contents = 5; - } - - //@@ .. cpp:var:: string model_name - //@@ - //@@ The name of the model used for inference. - //@@ - string model_name = 1; - - //@@ .. cpp:var:: string model_version - //@@ - //@@ The version of the model used for inference. - //@@ - string model_version = 2; - - //@@ .. cpp:var:: string id - //@@ - //@@ The id of the inference request if one was specified. - //@@ - string id = 3; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional inference response parameters. - //@@ - map parameters = 4; - - //@@ - //@@ .. cpp:var:: InferOutputTensor outputs (repeated) - //@@ - //@@ The output tensors holding inference results. - //@@ - repeated InferOutputTensor outputs = 5; - - //@@ - //@@ .. cpp:var:: bytes raw_output_contents - //@@ - //@@ The data contained in an output tensor can be represented in - //@@ "raw" bytes form or in the repeated type that matches the - //@@ tensor's data type. Using the "raw" bytes form will - //@@ typically allow higher performance due to the way protobuf - //@@ allocation and reuse interacts with GRPC. For example, see - //@@ https://github.com/grpc/grpc/issues/23231. - //@@ - //@@ To use the raw representation 'raw_output_contents' must be - //@@ initialized with data for each tensor in the same order as - //@@ 'outputs'. For each tensor, the size of this content must - //@@ match what is expected by the tensor's shape and data - //@@ type. The raw data must be the flattened, one-dimensional, - //@@ row-major order of the tensor elements without any stride - //@@ or padding between the elements. Note that the FP16 and BF16 data - //@@ types must be represented as raw content as there is no - //@@ specific data type for a 16-bit float type. - //@@ - //@@ If this field is specified then InferOutputTensor::contents - //@@ must not be specified for any output tensor. - //@@ - repeated bytes raw_output_contents = 6; -} - -//@@ -//@@.. cpp:var:: message ModelStreamInferResponse -//@@ -//@@ Response message for ModelStreamInfer. -//@@ -message ModelStreamInferResponse -{ - //@@ - //@@ .. cpp:var:: string error_message - //@@ - //@@ The message describing the error. The empty message - //@@ indicates the inference was successful without errors. - //@@ - string error_message = 1; - - //@@ - //@@ .. cpp:var:: ModelInferResponse infer_response - //@@ - //@@ Holds the results of the request. - //@@ - ModelInferResponse infer_response = 2; -} - -//@@ -//@@.. cpp:var:: message ModelConfigRequest -//@@ -//@@ Request message for ModelConfig. -//@@ -message ModelConfigRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model. If not given the model version - //@@ is selected automatically based on the version policy. - //@@ - string version = 2; -} - -//@@ -//@@.. cpp:var:: message ModelConfigResponse -//@@ -//@@ Response message for ModelConfig. -//@@ -message ModelConfigResponse -{ - //@@ - //@@ .. cpp:var:: ModelConfig config - //@@ - //@@ The model configuration. - //@@ - ModelConfig config = 1; -} - -//@@ -//@@.. cpp:var:: message ModelStatisticsRequest -//@@ -//@@ Request message for ModelStatistics. -//@@ -message ModelStatisticsRequest -{ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. If not given returns statistics for - //@@ all models. - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model. If not given returns statistics for - //@@ all model versions. - //@@ - string version = 2; -} - - -//@@ -//@@.. cpp:var:: message StatisticDuration -//@@ -//@@ Statistic recording a cumulative duration metric. -//@@ -message StatisticDuration -{ - //@@ .. cpp:var:: uint64 count - //@@ - //@@ Cumulative number of times this metric occurred. - //@@ - uint64 count = 1; - - //@@ .. cpp:var:: uint64 total_time_ns - //@@ - //@@ Total collected duration of this metric in nanoseconds. - //@@ - uint64 ns = 2; -} - -//@@ -//@@.. cpp:var:: message InferStatistics -//@@ -//@@ Inference statistics. -//@@ -message InferStatistics -{ - //@@ .. cpp:var:: StatisticDuration success - //@@ - //@@ Cumulative count and duration for successful inference - //@@ request. The "success" count and cumulative duration includes - //@@ cache hits. - //@@ - StatisticDuration success = 1; - - //@@ .. cpp:var:: StatisticDuration fail - //@@ - //@@ Cumulative count and duration for failed inference - //@@ request. - //@@ - StatisticDuration fail = 2; - - //@@ .. cpp:var:: StatisticDuration queue - //@@ - //@@ The count and cumulative duration that inference requests wait in - //@@ scheduling or other queues. The "queue" count and cumulative - //@@ duration includes cache hits. - //@@ - StatisticDuration queue = 3; - - //@@ .. cpp:var:: StatisticDuration compute_input - //@@ - //@@ The count and cumulative duration to prepare input tensor data as - //@@ required by the model framework / backend. For example, this duration - //@@ should include the time to copy input tensor data to the GPU. - //@@ The "compute_input" count and cumulative duration do not account for - //@@ requests that were a cache hit. See the "cache_hit" field for more - //@@ info. - //@@ - StatisticDuration compute_input = 4; - - //@@ .. cpp:var:: StatisticDuration compute_infer - //@@ - //@@ The count and cumulative duration to execute the model. - //@@ The "compute_infer" count and cumulative duration do not account for - //@@ requests that were a cache hit. See the "cache_hit" field for more - //@@ info. - //@@ - StatisticDuration compute_infer = 5; - - //@@ .. cpp:var:: StatisticDuration compute_output - //@@ - //@@ The count and cumulative duration to extract output tensor data - //@@ produced by the model framework / backend. For example, this duration - //@@ should include the time to copy output tensor data from the GPU. - //@@ The "compute_output" count and cumulative duration do not account for - //@@ requests that were a cache hit. See the "cache_hit" field for more - //@@ info. - //@@ - StatisticDuration compute_output = 6; - - //@@ .. cpp:var:: StatisticDuration cache_hit - //@@ - //@@ The count of response cache hits and cumulative duration to lookup - //@@ and extract output tensor data from the Response Cache on a cache - //@@ hit. For example, this duration should include the time to copy - //@@ output tensor data from the Response Cache to the response object. - //@@ On cache hits, triton does not need to go to the model/backend - //@@ for the output tensor data, so the "compute_input", "compute_infer", - //@@ and "compute_output" fields are not updated. Assuming the response - //@@ cache is enabled for a given model, a cache hit occurs for a - //@@ request to that model when the request metadata (model name, - //@@ model version, model inputs) hashes to an existing entry in the - //@@ cache. On a cache miss, the request hash and response output tensor - //@@ data is added to the cache. See response cache docs for more info: - //@@ - // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md - //@@ - StatisticDuration cache_hit = 7; - - //@@ .. cpp:var:: StatisticDuration cache_miss - //@@ - //@@ The count of response cache misses and cumulative duration to lookup - //@@ and insert output tensor data from the computed response to the - // cache. - //@@ For example, this duration should include the time to copy - //@@ output tensor data from the response object to the Response Cache. - //@@ Assuming the response cache is enabled for a given model, a cache - //@@ miss occurs for a request to that model when the request metadata - //@@ does NOT hash to an existing entry in the cache. See the response - //@@ cache docs for more info: - //@@ - // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md - //@@ - StatisticDuration cache_miss = 8; -} - -//@@ -//@@.. cpp:var:: message InferResponseStatistics -//@@ -//@@ Statistics per response. -//@@ -message InferResponseStatistics -{ - //@@ .. cpp:var:: StatisticDuration compute_infer - //@@ - //@@ The count and cumulative duration to compute a response. - //@@ - StatisticDuration compute_infer = 1; - - //@@ .. cpp:var:: StatisticDuration compute_output - //@@ - //@@ The count and cumulative duration to extract the output tensors of a - //@@ response. - //@@ - StatisticDuration compute_output = 2; - - //@@ .. cpp:var:: StatisticDuration success - //@@ - //@@ The count and cumulative duration for successful responses. - //@@ - StatisticDuration success = 3; - - //@@ .. cpp:var:: StatisticDuration fail - //@@ - //@@ The count and cumulative duration for failed responses. - //@@ - StatisticDuration fail = 4; - - //@@ .. cpp:var:: StatisticDuration empty_response - //@@ - //@@ The count and cumulative duration for empty responses. - //@@ - StatisticDuration empty_response = 5; - - //@@ .. cpp:var:: StatisticDuration cancel - //@@ - //@@ The count and cumulative duration, for cleaning up resources held by - //@@ a cancelled request, for cancelled responses. - //@@ - StatisticDuration cancel = 6; -} - -//@@ -//@@.. cpp:var:: message InferBatchStatistics -//@@ -//@@ Inference batch statistics. -//@@ -message InferBatchStatistics -{ - //@@ .. cpp:var:: uint64 batch_size - //@@ - //@@ The size of the batch. - //@@ - uint64 batch_size = 1; - - //@@ .. cpp:var:: StatisticDuration compute_input - //@@ - //@@ The count and cumulative duration to prepare input tensor data as - //@@ required by the model framework / backend with the given batch size. - //@@ For example, this duration should include the time to copy input - //@@ tensor data to the GPU. - //@@ - StatisticDuration compute_input = 2; - - //@@ .. cpp:var:: StatisticDuration compute_infer - //@@ - //@@ The count and cumulative duration to execute the model with the given - //@@ batch size. - //@@ - StatisticDuration compute_infer = 3; - - //@@ .. cpp:var:: StatisticDuration compute_output - //@@ - //@@ The count and cumulative duration to extract output tensor data - //@@ produced by the model framework / backend with the given batch size. - //@@ For example, this duration should include the time to copy output - //@@ tensor data from the GPU. - //@@ - StatisticDuration compute_output = 4; -} - -//@@ -//@@.. cpp:var:: message MemoryUsage -//@@ -//@@ Memory usage. -//@@ -message MemoryUsage -{ - //@@ .. cpp:var:: string type - //@@ - //@@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU". - //@@ - string type = 1; - - //@@ .. cpp:var:: int64 id - //@@ - //@@ The id of the memory, typically used with "type" to identify - //@@ a device that hosts the memory. - //@@ - int64 id = 2; - - //@@ .. cpp:var:: uint64 byte_size - //@@ - //@@ The byte size of the memory. - //@@ - uint64 byte_size = 3; -} - -//@@ -//@@.. cpp:var:: message ModelStatistics -//@@ -//@@ Statistics for a specific model and version. -//@@ -message ModelStatistics -{ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. If not given returns statistics for all - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model. - //@@ - string version = 2; - - //@@ .. cpp:var:: uint64 last_inference - //@@ - //@@ The timestamp of the last inference request made for this model, - //@@ as milliseconds since the epoch. - //@@ - uint64 last_inference = 3; - - //@@ .. cpp:var:: uint64 last_inference - //@@ - //@@ The cumulative count of successful inference requests made for this - //@@ model. Each inference in a batched request is counted as an - //@@ individual inference. For example, if a client sends a single - //@@ inference request with batch size 64, "inference_count" will be - //@@ incremented by 64. Similarly, if a clients sends 64 individual - //@@ requests each with batch size 1, "inference_count" will be - //@@ incremented by 64. The "inference_count" value DOES NOT include - //@@ cache hits. - //@@ - uint64 inference_count = 4; - - //@@ .. cpp:var:: uint64 last_inference - //@@ - //@@ The cumulative count of the number of successful inference executions - //@@ performed for the model. When dynamic batching is enabled, a single - //@@ model execution can perform inferencing for more than one inference - //@@ request. For example, if a clients sends 64 individual requests each - //@@ with batch size 1 and the dynamic batcher batches them into a single - //@@ large batch for model execution then "execution_count" will be - //@@ incremented by 1. If, on the other hand, the dynamic batcher is not - //@@ enabled for that each of the 64 individual requests is executed - //@@ independently, then "execution_count" will be incremented by 64. - //@@ The "execution_count" value DOES NOT include cache hits. - //@@ - uint64 execution_count = 5; - - //@@ .. cpp:var:: InferStatistics inference_stats - //@@ - //@@ The aggregate statistics for the model/version. - //@@ - InferStatistics inference_stats = 6; - - //@@ .. cpp:var:: InferBatchStatistics batch_stats (repeated) - //@@ - //@@ The aggregate statistics for each different batch size that is - //@@ executed in the model. The batch statistics indicate how many actual - //@@ model executions were performed and show differences due to different - //@@ batch size (for example, larger batches typically take longer to - //@@ compute). - //@@ - repeated InferBatchStatistics batch_stats = 7; - - //@@ .. cpp:var:: MemoryUsage memory_usage (repeated) - //@@ - //@@ The memory usage detected during model loading, which may be used to - //@@ estimate the memory to be released once the model is unloaded. Note - //@@ that the estimation is inferenced by the profiling tools and - //@@ framework's memory schema, therefore it is advised to perform - //@@ experiments to understand the scenario that the reported memory usage - //@@ can be relied on. As a starting point, the GPU memory usage for - //@@ models in ONNX Runtime backend and TensorRT backend is usually - //@@ aligned. - //@@ - repeated MemoryUsage memory_usage = 8; - - //@@ .. cpp:var:: map response_stats - //@@ - //@@ The key and value pairs for all responses statistics. The key is a - //@@ string identifying a set of response statistics aggregated together - //@@ (i.e. index of the response sent). The value is the aggregated - //@@ response statistics. - //@@ - map response_stats = 9; -} - -//@@ -//@@.. cpp:var:: message ModelStatisticsResponse -//@@ -//@@ Response message for ModelStatistics. -//@@ -message ModelStatisticsResponse -{ - //@@ .. cpp:var:: ModelStatistics model_stats (repeated) - //@@ - //@@ Statistics for each requested model. - //@@ - repeated ModelStatistics model_stats = 1; -} - -//@@ -//@@.. cpp:var:: message ModelRepositoryParameter -//@@ -//@@ An model repository parameter value. -//@@ -message ModelRepositoryParameter -{ - //@@ .. cpp:var:: oneof parameter_choice - //@@ - //@@ The parameter value can be a string, an int64 or - //@@ a boolean - //@@ - oneof parameter_choice - { - //@@ .. cpp:var:: bool bool_param - //@@ - //@@ A boolean parameter value. - //@@ - bool bool_param = 1; - - //@@ .. cpp:var:: int64 int64_param - //@@ - //@@ An int64 parameter value. - //@@ - int64 int64_param = 2; - - //@@ .. cpp:var:: string string_param - //@@ - //@@ A string parameter value. - //@@ - string string_param = 3; - - //@@ .. cpp:var:: bytes bytes_param - //@@ - //@@ A bytes parameter value. - //@@ - bytes bytes_param = 4; - } -} - -//@@ -//@@.. cpp:var:: message RepositoryIndexRequest -//@@ -//@@ Request message for RepositoryIndex. -//@@ -message RepositoryIndexRequest -{ - //@@ .. cpp:var:: string repository_name - //@@ - //@@ The name of the repository. If empty the index is returned - //@@ for all repositories. - //@@ - string repository_name = 1; - - //@@ .. cpp:var:: bool ready - //@@ - //@@ If true returned only models currently ready for inferencing. - //@@ - bool ready = 2; -} - -//@@ -//@@.. cpp:var:: message RepositoryIndexResponse -//@@ -//@@ Response message for RepositoryIndex. -//@@ -message RepositoryIndexResponse -{ - //@@ - //@@ .. cpp:var:: message ModelIndex - //@@ - //@@ Index entry for a model. - //@@ - message ModelIndex - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. - //@@ - string name = 1; - - //@@ .. cpp:var:: string version - //@@ - //@@ The version of the model. - //@@ - string version = 2; - - //@@ - //@@ .. cpp:var:: string state - //@@ - //@@ The state of the model. - //@@ - string state = 3; - - //@@ - //@@ .. cpp:var:: string reason - //@@ - //@@ The reason, if any, that the model is in the given state. - //@@ - string reason = 4; - } - - //@@ - //@@ .. cpp:var:: ModelIndex models (repeated) - //@@ - //@@ An index entry for each model. - //@@ - repeated ModelIndex models = 1; -} - -//@@ -//@@.. cpp:var:: message RepositoryModelLoadRequest -//@@ -//@@ Request message for RepositoryModelLoad. -//@@ -message RepositoryModelLoadRequest -{ - //@@ .. cpp:var:: string repository_name - //@@ - //@@ The name of the repository to load from. If empty the model - //@@ is loaded from any repository. - //@@ - string repository_name = 1; - - //@@ .. cpp:var:: string repository_name - //@@ - //@@ The name of the model to load, or reload. - //@@ - string model_name = 2; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional model repository request parameters. - //@@ - map parameters = 3; -} - -//@@ -//@@.. cpp:var:: message RepositoryModelLoadResponse -//@@ -//@@ Response message for RepositoryModelLoad. -//@@ -message RepositoryModelLoadResponse {} - -//@@ -//@@.. cpp:var:: message RepositoryModelUnloadRequest -//@@ -//@@ Request message for RepositoryModelUnload. -//@@ -message RepositoryModelUnloadRequest -{ - //@@ .. cpp:var:: string repository_name - //@@ - //@@ The name of the repository from which the model was originally - //@@ loaded. If empty the repository is not considered. - //@@ - string repository_name = 1; - - //@@ .. cpp:var:: string repository_name - //@@ - //@@ The name of the model to unload. - //@@ - string model_name = 2; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional model repository request parameters. - //@@ - map parameters = 3; -} - -//@@ -//@@.. cpp:var:: message RepositoryModelUnloadResponse -//@@ -//@@ Response message for RepositoryModelUnload. -//@@ -message RepositoryModelUnloadResponse {} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryStatusRequest -//@@ -//@@ Request message for SystemSharedMemoryStatus. -//@@ -message SystemSharedMemoryStatusRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the region to get status for. If empty the - //@@ status is returned for all registered regions. - //@@ - string name = 1; -} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryStatusResponse -//@@ -//@@ Response message for SystemSharedMemoryStatus. -//@@ -message SystemSharedMemoryStatusResponse -{ - //@@ - //@@ .. cpp:var:: message RegionStatus - //@@ - //@@ Status for a shared memory region. - //@@ - message RegionStatus - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name for the shared memory region. - //@@ - string name = 1; - - //@@ .. cpp:var:: string shared_memory_key - //@@ - //@@ The key of the underlying memory object that contains the - //@@ shared memory region. - //@@ - string key = 2; - - //@@ .. cpp:var:: uint64 offset - //@@ - //@@ Offset, in bytes, within the underlying memory object to - //@@ the start of the shared memory region. - //@@ - uint64 offset = 3; - - //@@ .. cpp:var:: uint64 byte_size - //@@ - //@@ Size of the shared memory region, in bytes. - //@@ - uint64 byte_size = 4; - } - - //@@ - //@@ .. cpp:var:: map regions - //@@ - //@@ Status for each of the registered regions, indexed by - //@@ region name. - //@@ - map regions = 1; -} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest -//@@ -//@@ Request message for SystemSharedMemoryRegister. -//@@ -message SystemSharedMemoryRegisterRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the region to register. - //@@ - string name = 1; - - //@@ .. cpp:var:: string shared_memory_key - //@@ - //@@ The key of the underlying memory object that contains the - //@@ shared memory region. - //@@ - string key = 2; - - //@@ .. cpp:var:: uint64 offset - //@@ - //@@ Offset, in bytes, within the underlying memory object to - //@@ the start of the shared memory region. - //@@ - uint64 offset = 3; - - //@@ .. cpp:var:: uint64 byte_size - //@@ - //@@ Size of the shared memory region, in bytes. - //@@ - uint64 byte_size = 4; -} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse -//@@ -//@@ Response message for SystemSharedMemoryRegister. -//@@ -message SystemSharedMemoryRegisterResponse {} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest -//@@ -//@@ Request message for SystemSharedMemoryUnregister. -//@@ -message SystemSharedMemoryUnregisterRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the system region to unregister. If empty - //@@ all system shared-memory regions are unregistered. - //@@ - string name = 1; -} - -//@@ -//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse -//@@ -//@@ Response message for SystemSharedMemoryUnregister. -//@@ -message SystemSharedMemoryUnregisterResponse {} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryStatusRequest -//@@ -//@@ Request message for CudaSharedMemoryStatus. -//@@ -message CudaSharedMemoryStatusRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the region to get status for. If empty the - //@@ status is returned for all registered regions. - //@@ - string name = 1; -} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryStatusResponse -//@@ -//@@ Response message for CudaSharedMemoryStatus. -//@@ -message CudaSharedMemoryStatusResponse -{ - //@@ - //@@ .. cpp:var:: message RegionStatus - //@@ - //@@ Status for a shared memory region. - //@@ - message RegionStatus - { - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name for the shared memory region. - //@@ - string name = 1; - - //@@ .. cpp:var:: uin64 device_id - //@@ - //@@ The GPU device ID where the cudaIPC handle was created. - //@@ - uint64 device_id = 2; - - //@@ .. cpp:var:: uint64 byte_size - //@@ - //@@ Size of the shared memory region, in bytes. - //@@ - uint64 byte_size = 3; - } - - //@@ - //@@ .. cpp:var:: map regions - //@@ - //@@ Status for each of the registered regions, indexed by - //@@ region name. - //@@ - map regions = 1; -} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest -//@@ -//@@ Request message for CudaSharedMemoryRegister. -//@@ -message CudaSharedMemoryRegisterRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the region to register. - //@@ - string name = 1; - - //@@ .. cpp:var:: bytes raw_handle - //@@ - //@@ The raw serialized cudaIPC handle. - //@@ - bytes raw_handle = 2; - - //@@ .. cpp:var:: int64 device_id - //@@ - //@@ The GPU device ID on which the cudaIPC handle was created. - //@@ - int64 device_id = 3; - - //@@ .. cpp:var:: uint64 byte_size - //@@ - //@@ Size of the shared memory block, in bytes. - //@@ - uint64 byte_size = 4; -} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse -//@@ -//@@ Response message for CudaSharedMemoryRegister. -//@@ -message CudaSharedMemoryRegisterResponse {} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest -//@@ -//@@ Request message for CudaSharedMemoryUnregister. -//@@ -message CudaSharedMemoryUnregisterRequest -{ - //@@ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the cuda region to unregister. If empty - //@@ all cuda shared-memory regions are unregistered. - //@@ - string name = 1; -} - -//@@ -//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse -//@@ -//@@ Response message for CudaSharedMemoryUnregister. -//@@ -message CudaSharedMemoryUnregisterResponse {} - -//@@ -//@@.. cpp:var:: message TraceSettingRequest -//@@ -//@@ Request message for TraceSetting. -//@@ -message TraceSettingRequest -{ - //@@ - //@@ .. cpp:var:: message SettingValue - //@@ - //@@ The values to be associated with a trace setting. - //@@ If no value is provided, the setting will be clear and - //@@ the global setting value will be used. - //@@ - message SettingValue - { - //@@ - //@@ .. cpp:var:: string value (repeated) - //@@ - //@@ The value. - //@@ - repeated string value = 1; - } - - //@@ .. cpp:var:: map settings - //@@ - //@@ The new setting values to be updated, - //@@ settings that are not specified will remain unchanged. - //@@ - map settings = 1; - - //@@ - //@@ .. cpp:var:: string model_name - //@@ - //@@ The name of the model to apply the new trace settings. - //@@ If not given, the new settings will be applied globally. - //@@ - string model_name = 2; -} - -//@@ -//@@.. cpp:var:: message TraceSettingResponse -//@@ -//@@ Response message for TraceSetting. -//@@ -message TraceSettingResponse -{ - //@@ - //@@ .. cpp:var:: message SettingValue - //@@ - //@@ The values to be associated with a trace setting. - //@@ - message SettingValue - { - //@@ - //@@ .. cpp:var:: string value (repeated) - //@@ - //@@ The value. - //@@ - repeated string value = 1; - } - - //@@ .. cpp:var:: map settings - //@@ - //@@ The current trace settings, including any changes specified - //@@ by TraceSettingRequest. - //@@ - map settings = 1; -} - -//@@ -//@@.. cpp:var:: message LogSettingsRequest -//@@ -//@@ Request message for LogSettings. -//@@ -message LogSettingsRequest -{ - message SettingValue - { - oneof parameter_choice - { - //@@ .. cpp:var:: bool bool_param - //@@ - //@@ A boolean parameter value. - //@@ - bool bool_param = 1; - - //@@ .. cpp:var:: uint32 uint32_param - //@@ - //@@ An uint32 parameter value. - //@@ - uint32 uint32_param = 2; - - //@@ .. cpp:var:: string string_param - //@@ - //@@ A string parameter value. - //@@ - string string_param = 3; - } - } - //@@ .. cpp:var:: map settings - //@@ - //@@ The current log settings. - //@@ - map settings = 1; -} - -//@@ -//@@.. cpp:var:: message LogSettingsResponse -//@@ -//@@ Response message for LogSettings. -//@@ -message LogSettingsResponse -{ - message SettingValue - { - oneof parameter_choice - { - //@@ .. cpp:var:: bool bool_param - //@@ - //@@ A boolean parameter value. - //@@ - bool bool_param = 1; - - //@@ .. cpp:var:: uint32 uint32_param - //@@ - //@@ An int32 parameter value. - //@@ - uint32 uint32_param = 2; - - //@@ .. cpp:var:: string string_param - //@@ - //@@ A string parameter value. - //@@ - string string_param = 3; - } - } - //@@ .. cpp:var:: map settings - //@@ - //@@ The current log settings. - //@@ - map settings = 1; } diff --git a/protobuf/grpccallback_service.proto b/protobuf/grpccallback_service.proto new file mode 100644 index 0000000..afbb2de --- /dev/null +++ b/protobuf/grpccallback_service.proto @@ -0,0 +1,209 @@ +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; + +package inference; + +//@@.. cpp:namespace:: inference + +import "model_config.proto"; + +//@@ +//@@.. cpp:var:: service GRPCInferenceServiceCallback +//@@ +//@@ Inference Server GRPC Callback endpoints. +//@@ +service GRPCInferenceServiceCallback +{ + //@@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns + //@@ (ServerLiveResponse) + //@@ + //@@ Check liveness of the inference server. + //@@ + rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} + + //@@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns + //@@ (ServerReadyResponse) + //@@ + //@@ Check readiness of the inference server. + //@@ + rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} + + //@@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns + //@@ (ModelReadyResponse) + //@@ + //@@ Check readiness of a model in the inference server. + //@@ + rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} + + //@@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns + //@@ (ServerMetadataResponse) + //@@ + //@@ Get server metadata. + //@@ + rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} + + //@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns + //@@ (ModelMetadataResponse) + //@@ + //@@ Get model metadata. + //@@ + rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} + + //@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns + //@@ (ModelConfigResponse) + //@@ + //@@ Get model configuration. + //@@ + rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {} + + //@@ .. cpp:var:: rpc ModelStatistics( + //@@ ModelStatisticsRequest) + //@@ returns (ModelStatisticsResponse) + //@@ + //@@ Get the cumulative inference statistics for a model. + //@@ + rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse) + { + } + + //@@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns + //@@ (RepositoryIndexResponse) + //@@ + //@@ Get the index of model repository contents. + //@@ + rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse) + { + } + + //@@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns + //@@ (RepositoryModelLoadResponse) + //@@ + //@@ Load or reload a model from a repository. + //@@ + rpc RepositoryModelLoad(RepositoryModelLoadRequest) + returns (RepositoryModelLoadResponse) + { + } + + //@@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) + //@@ returns (RepositoryModelUnloadResponse) + //@@ + //@@ Unload a model. + //@@ + rpc RepositoryModelUnload(RepositoryModelUnloadRequest) + returns (RepositoryModelUnloadResponse) + { + } + + //@@ .. cpp:var:: rpc SystemSharedMemoryStatus( + //@@ SystemSharedMemoryStatusRequest) + //@@ returns (SystemSharedMemoryStatusRespose) + //@@ + //@@ Get the status of all registered system-shared-memory regions. + //@@ + rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest) + returns (SystemSharedMemoryStatusResponse) + { + } + + //@@ .. cpp:var:: rpc SystemSharedMemoryRegister( + //@@ SystemSharedMemoryRegisterRequest) + //@@ returns (SystemSharedMemoryRegisterResponse) + //@@ + //@@ Register a system-shared-memory region. + //@@ + rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest) + returns (SystemSharedMemoryRegisterResponse) + { + } + + //@@ .. cpp:var:: rpc SystemSharedMemoryUnregister( + //@@ SystemSharedMemoryUnregisterRequest) + //@@ returns (SystemSharedMemoryUnregisterResponse) + //@@ + //@@ Unregister a system-shared-memory region. + //@@ + rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest) + returns (SystemSharedMemoryUnregisterResponse) + { + } + + //@@ .. cpp:var:: rpc CudaSharedMemoryStatus( + //@@ CudaSharedMemoryStatusRequest) + //@@ returns (CudaSharedMemoryStatusRespose) + //@@ + //@@ Get the status of all registered CUDA-shared-memory regions. + //@@ + rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest) + returns (CudaSharedMemoryStatusResponse) + { + } + + //@@ .. cpp:var:: rpc CudaSharedMemoryRegister( + //@@ CudaSharedMemoryRegisterRequest) + //@@ returns (CudaSharedMemoryRegisterResponse) + //@@ + //@@ Register a CUDA-shared-memory region. + //@@ + rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest) + returns (CudaSharedMemoryRegisterResponse) + { + } + + //@@ .. cpp:var:: rpc CudaSharedMemoryUnregister( + //@@ CudaSharedMemoryUnregisterRequest) + //@@ returns (CudaSharedMemoryUnregisterResponse) + //@@ + //@@ Unregister a CUDA-shared-memory region. + //@@ + rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest) + returns (CudaSharedMemoryUnregisterResponse) + { + } + + //@@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) + //@@ returns (TraceSettingResponse) + //@@ + //@@ Update and get the trace setting of the Triton server. + //@@ + rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse) {} + + //@@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) + //@@ returns (LogSettingsResponse) + //@@ + //@@ Update and get the log settings of the Triton server. + //@@ + rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse) {} + + //@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns + //@@ (ModelInferResponse) + //@@ + //@@ Perform inference using this specific model. + //@@ + rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} +} diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index 166cbb0..b47d8f0 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -2163,3 +2163,1594 @@ message ModelConfig //@@ ModelMetrics model_metrics = 26; } + +//@@ +//@@.. cpp:var:: message ServerLiveRequest +//@@ +//@@ Request message for ServerLive. +//@@ +message ServerLiveRequest {} + +//@@ +//@@.. cpp:var:: message ServerLiveResponse +//@@ +//@@ Response message for ServerLive. +//@@ +message ServerLiveResponse +{ + //@@ + //@@ .. cpp:var:: bool live + //@@ + //@@ True if the inference server is live, false it not live. + //@@ + bool live = 1; +} + +//@@ +//@@.. cpp:var:: message ServerReadyRequest +//@@ +//@@ Request message for ServerReady. +//@@ +message ServerReadyRequest {} + +//@@ +//@@.. cpp:var:: message ServerReadyResponse +//@@ +//@@ Response message for ServerReady. +//@@ +message ServerReadyResponse +{ + //@@ + //@@ .. cpp:var:: bool ready + //@@ + //@@ True if the inference server is ready, false it not ready. + //@@ + bool ready = 1; +} + +//@@ +//@@.. cpp:var:: message ModelReadyRequest +//@@ +//@@ Request message for ModelReady. +//@@ +message ModelReadyRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model to check for readiness. + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model to check for readiness. If not given the + //@@ server will choose a version based on the model and internal policy. + //@@ + string version = 2; +} + +//@@ +//@@.. cpp:var:: message ModelReadyResponse +//@@ +//@@ Response message for ModelReady. +//@@ +message ModelReadyResponse +{ + //@@ + //@@ .. cpp:var:: bool ready + //@@ + //@@ True if the model is ready, false it not ready. + //@@ + bool ready = 1; +} + +//@@ +//@@.. cpp:var:: message ServerMetadataRequest +//@@ +//@@ Request message for ServerMetadata. +//@@ +message ServerMetadataRequest {} + +//@@ +//@@.. cpp:var:: message ServerMetadataResponse +//@@ +//@@ Response message for ServerMetadata. +//@@ +message ServerMetadataResponse +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The server name. + //@@ + string name = 1; + + //@@ + //@@ .. cpp:var:: string version + //@@ + //@@ The server version. + //@@ + string version = 2; + + //@@ + //@@ .. cpp:var:: string extensions (repeated) + //@@ + //@@ The extensions supported by the server. + //@@ + repeated string extensions = 3; +} + +//@@ +//@@.. cpp:var:: message ModelMetadataRequest +//@@ +//@@ Request message for ModelMetadata. +//@@ +message ModelMetadataRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model to check for readiness. If not + //@@ given the server will choose a version based on the + //@@ model and internal policy. + //@@ + string version = 2; +} + +//@@ +//@@.. cpp:var:: message ModelMetadataResponse +//@@ +//@@ Response message for ModelMetadata. +//@@ +message ModelMetadataResponse +{ + //@@ + //@@ .. cpp:var:: message TensorMetadata + //@@ + //@@ Metadata for a tensor. + //@@ + message TensorMetadata + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The tensor name. + //@@ + string name = 1; + + //@@ + //@@ .. cpp:var:: string datatype + //@@ + //@@ The tensor data type. + //@@ + string datatype = 2; + + //@@ + //@@ .. cpp:var:: int64 shape (repeated) + //@@ + //@@ The tensor shape. A variable-size dimension is represented + //@@ by a -1 value. + //@@ + repeated int64 shape = 3; + } + + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The model name. + //@@ + string name = 1; + + //@@ + //@@ .. cpp:var:: string versions (repeated) + //@@ + //@@ The versions of the model. + //@@ + repeated string versions = 2; + + //@@ + //@@ .. cpp:var:: string platform + //@@ + //@@ The model's platform. + //@@ + string platform = 3; + + //@@ + //@@ .. cpp:var:: TensorMetadata inputs (repeated) + //@@ + //@@ The model's inputs. + //@@ + repeated TensorMetadata inputs = 4; + + //@@ + //@@ .. cpp:var:: TensorMetadata outputs (repeated) + //@@ + //@@ The model's outputs. + //@@ + repeated TensorMetadata outputs = 5; +} + +//@@ +//@@.. cpp:var:: message InferParameter +//@@ +//@@ An inference parameter value. +//@@ +message InferParameter +{ + //@@ .. cpp:var:: oneof parameter_choice + //@@ + //@@ The parameter value can be a string, an int64, + //@@ an uint64, a double, or a boolean + //@@ + //@@ Note: double and uint64 are currently + //@@ placeholders for future use and + //@@ are not supported for custom parameters + //@@ + oneof parameter_choice + { + //@@ .. cpp:var:: bool bool_param + //@@ + //@@ A boolean parameter value. + //@@ + bool bool_param = 1; + + //@@ .. cpp:var:: int64 int64_param + //@@ + //@@ An int64 parameter value. + //@@ + int64 int64_param = 2; + + //@@ .. cpp:var:: string string_param + //@@ + //@@ A string parameter value. + //@@ + string string_param = 3; + + //@@ .. cpp:var:: double double_param + //@@ + //@@ A double parameter value. + //@@ + double double_param = 4; + + //@@ .. cpp:var:: uint64 uint64_param + //@@ + //@@ A uint64 parameter value. + //@@ + //@@ Not supported for custom parameters + //@@ + uint64 uint64_param = 5; + } +} + +//@@ +//@@.. cpp:var:: message InferTensorContents +//@@ +//@@ The data contained in a tensor represented by the repeated type +//@@ that matches the tensor's data type. Protobuf oneof is not used +//@@ because oneofs cannot contain repeated fields. +//@@ +message InferTensorContents +{ + //@@ + //@@ .. cpp:var:: bool bool_contents (repeated) + //@@ + //@@ Representation for BOOL data type. The size must match what is + //@@ expected by the tensor's shape. The contents must be the flattened, + //@@ one-dimensional, row-major order of the tensor elements. + //@@ + repeated bool bool_contents = 1; + + //@@ + //@@ .. cpp:var:: int32 int_contents (repeated) + //@@ + //@@ Representation for INT8, INT16, and INT32 data types. The size + //@@ must match what is expected by the tensor's shape. The contents + //@@ must be the flattened, one-dimensional, row-major order of the + //@@ tensor elements. + //@@ + repeated int32 int_contents = 2; + + //@@ + //@@ .. cpp:var:: int64 int64_contents (repeated) + //@@ + //@@ Representation for INT64 data types. The size must match what + //@@ is expected by the tensor's shape. The contents must be the + //@@ flattened, one-dimensional, row-major order of the tensor elements. + //@@ + repeated int64 int64_contents = 3; + + //@@ + //@@ .. cpp:var:: uint32 uint_contents (repeated) + //@@ + //@@ Representation for UINT8, UINT16, and UINT32 data types. The size + //@@ must match what is expected by the tensor's shape. The contents + //@@ must be the flattened, one-dimensional, row-major order of the + //@@ tensor elements. + //@@ + repeated uint32 uint_contents = 4; + + //@@ + //@@ .. cpp:var:: uint64 uint64_contents (repeated) + //@@ + //@@ Representation for UINT64 data types. The size must match what + //@@ is expected by the tensor's shape. The contents must be the + //@@ flattened, one-dimensional, row-major order of the tensor elements. + //@@ + repeated uint64 uint64_contents = 5; + + //@@ + //@@ .. cpp:var:: float fp32_contents (repeated) + //@@ + //@@ Representation for FP32 data type. The size must match what is + //@@ expected by the tensor's shape. The contents must be the flattened, + //@@ one-dimensional, row-major order of the tensor elements. + //@@ + repeated float fp32_contents = 6; + + //@@ + //@@ .. cpp:var:: double fp64_contents (repeated) + //@@ + //@@ Representation for FP64 data type. The size must match what is + //@@ expected by the tensor's shape. The contents must be the flattened, + //@@ one-dimensional, row-major order of the tensor elements. + //@@ + repeated double fp64_contents = 7; + + //@@ + //@@ .. cpp:var:: bytes bytes_contents (repeated) + //@@ + //@@ Representation for BYTES data type. The size must match what is + //@@ expected by the tensor's shape. The contents must be the flattened, + //@@ one-dimensional, row-major order of the tensor elements. + //@@ + repeated bytes bytes_contents = 8; +} + +//@@ +//@@.. cpp:var:: message ModelInferRequest +//@@ +//@@ Request message for ModelInfer. +//@@ +message ModelInferRequest +{ + //@@ + //@@ .. cpp:var:: message InferInputTensor + //@@ + //@@ An input tensor for an inference request. + //@@ + message InferInputTensor + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The tensor name. + //@@ + string name = 1; + + //@@ + //@@ .. cpp:var:: string datatype + //@@ + //@@ The tensor data type. + //@@ + string datatype = 2; + + //@@ + //@@ .. cpp:var:: int64 shape (repeated) + //@@ + //@@ The tensor shape. + //@@ + repeated int64 shape = 3; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional inference input tensor parameters. + //@@ + map parameters = 4; + + //@@ .. cpp:var:: InferTensorContents contents + //@@ + //@@ The tensor contents using a data-type format. This field + //@@ must not be specified if tensor contents are being specified + //@@ in ModelInferRequest.raw_input_contents. + //@@ + InferTensorContents contents = 5; + } + + //@@ + //@@ .. cpp:var:: message InferRequestedOutputTensor + //@@ + //@@ An output tensor requested for an inference request. + //@@ + message InferRequestedOutputTensor + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The tensor name. + //@@ + string name = 1; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional requested output tensor parameters. + //@@ + map parameters = 2; + } + + //@@ .. cpp:var:: string model_name + //@@ + //@@ The name of the model to use for inferencing. + //@@ + string model_name = 1; + + //@@ .. cpp:var:: string model_version + //@@ + //@@ The version of the model to use for inference. If not + //@@ given the latest/most-recent version of the model is used. + //@@ + string model_version = 2; + + //@@ .. cpp:var:: string id + //@@ + //@@ Optional identifier for the request. If specified will be + //@@ returned in the response. + //@@ + string id = 3; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional inference parameters. + //@@ + map parameters = 4; + + //@@ + //@@ .. cpp:var:: InferInputTensor inputs (repeated) + //@@ + //@@ The input tensors for the inference. + //@@ + repeated InferInputTensor inputs = 5; + + //@@ + //@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated) + //@@ + //@@ The requested output tensors for the inference. Optional, if not + //@@ specified all outputs specified in the model config will be + //@@ returned. + //@@ + repeated InferRequestedOutputTensor outputs = 6; + + //@@ + //@@ .. cpp:var:: bytes raw_input_contents + //@@ + //@@ The data contained in an input tensor can be represented in + //@@ "raw" bytes form or in the repeated type that matches the + //@@ tensor's data type. Using the "raw" bytes form will + //@@ typically allow higher performance due to the way protobuf + //@@ allocation and reuse interacts with GRPC. For example, see + //@@ https://github.com/grpc/grpc/issues/23231. + //@@ + //@@ To use the raw representation 'raw_input_contents' must be + //@@ initialized with data for each tensor in the same order as + //@@ 'inputs'. For each tensor, the size of this content must + //@@ match what is expected by the tensor's shape and data + //@@ type. The raw data must be the flattened, one-dimensional, + //@@ row-major order of the tensor elements without any stride + //@@ or padding between the elements. Note that the FP16 and BF16 data + //@@ types must be represented as raw content as there is no + //@@ specific data type for a 16-bit float type. + //@@ + //@@ If this field is specified then InferInputTensor::contents + //@@ must not be specified for any input tensor. + //@@ + repeated bytes raw_input_contents = 7; +} + +//@@ +//@@.. cpp:var:: message ModelInferResponse +//@@ +//@@ Response message for ModelInfer. +//@@ +message ModelInferResponse +{ + //@@ + //@@ .. cpp:var:: message InferOutputTensor + //@@ + //@@ An output tensor returned for an inference request. + //@@ + message InferOutputTensor + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The tensor name. + //@@ + string name = 1; + + //@@ + //@@ .. cpp:var:: string datatype + //@@ + //@@ The tensor data type. + //@@ + string datatype = 2; + + //@@ + //@@ .. cpp:var:: int64 shape (repeated) + //@@ + //@@ The tensor shape. + //@@ + repeated int64 shape = 3; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional output tensor parameters. + //@@ + map parameters = 4; + + //@@ .. cpp:var:: InferTensorContents contents + //@@ + //@@ The tensor contents using a data-type format. This field + //@@ must not be specified if tensor contents are being specified + //@@ in ModelInferResponse.raw_output_contents. + //@@ + InferTensorContents contents = 5; + } + + //@@ .. cpp:var:: string model_name + //@@ + //@@ The name of the model used for inference. + //@@ + string model_name = 1; + + //@@ .. cpp:var:: string model_version + //@@ + //@@ The version of the model used for inference. + //@@ + string model_version = 2; + + //@@ .. cpp:var:: string id + //@@ + //@@ The id of the inference request if one was specified. + //@@ + string id = 3; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional inference response parameters. + //@@ + map parameters = 4; + + //@@ + //@@ .. cpp:var:: InferOutputTensor outputs (repeated) + //@@ + //@@ The output tensors holding inference results. + //@@ + repeated InferOutputTensor outputs = 5; + + //@@ + //@@ .. cpp:var:: bytes raw_output_contents + //@@ + //@@ The data contained in an output tensor can be represented in + //@@ "raw" bytes form or in the repeated type that matches the + //@@ tensor's data type. Using the "raw" bytes form will + //@@ typically allow higher performance due to the way protobuf + //@@ allocation and reuse interacts with GRPC. For example, see + //@@ https://github.com/grpc/grpc/issues/23231. + //@@ + //@@ To use the raw representation 'raw_output_contents' must be + //@@ initialized with data for each tensor in the same order as + //@@ 'outputs'. For each tensor, the size of this content must + //@@ match what is expected by the tensor's shape and data + //@@ type. The raw data must be the flattened, one-dimensional, + //@@ row-major order of the tensor elements without any stride + //@@ or padding between the elements. Note that the FP16 and BF16 data + //@@ types must be represented as raw content as there is no + //@@ specific data type for a 16-bit float type. + //@@ + //@@ If this field is specified then InferOutputTensor::contents + //@@ must not be specified for any output tensor. + //@@ + repeated bytes raw_output_contents = 6; +} + +//@@ +//@@.. cpp:var:: message ModelStreamInferResponse +//@@ +//@@ Response message for ModelStreamInfer. +//@@ +message ModelStreamInferResponse +{ + //@@ + //@@ .. cpp:var:: string error_message + //@@ + //@@ The message describing the error. The empty message + //@@ indicates the inference was successful without errors. + //@@ + string error_message = 1; + + //@@ + //@@ .. cpp:var:: ModelInferResponse infer_response + //@@ + //@@ Holds the results of the request. + //@@ + ModelInferResponse infer_response = 2; +} + +//@@ +//@@.. cpp:var:: message ModelConfigRequest +//@@ +//@@ Request message for ModelConfig. +//@@ +message ModelConfigRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model. If not given the model version + //@@ is selected automatically based on the version policy. + //@@ + string version = 2; +} + +//@@ +//@@.. cpp:var:: message ModelConfigResponse +//@@ +//@@ Response message for ModelConfig. +//@@ +message ModelConfigResponse +{ + //@@ + //@@ .. cpp:var:: ModelConfig config + //@@ + //@@ The model configuration. + //@@ + ModelConfig config = 1; +} + +//@@ +//@@.. cpp:var:: message ModelStatisticsRequest +//@@ +//@@ Request message for ModelStatistics. +//@@ +message ModelStatisticsRequest +{ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. If not given returns statistics for + //@@ all models. + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model. If not given returns statistics for + //@@ all model versions. + //@@ + string version = 2; +} + + +//@@ +//@@.. cpp:var:: message StatisticDuration +//@@ +//@@ Statistic recording a cumulative duration metric. +//@@ +message StatisticDuration +{ + //@@ .. cpp:var:: uint64 count + //@@ + //@@ Cumulative number of times this metric occurred. + //@@ + uint64 count = 1; + + //@@ .. cpp:var:: uint64 total_time_ns + //@@ + //@@ Total collected duration of this metric in nanoseconds. + //@@ + uint64 ns = 2; +} + +//@@ +//@@.. cpp:var:: message InferStatistics +//@@ +//@@ Inference statistics. +//@@ +message InferStatistics +{ + //@@ .. cpp:var:: StatisticDuration success + //@@ + //@@ Cumulative count and duration for successful inference + //@@ request. The "success" count and cumulative duration includes + //@@ cache hits. + //@@ + StatisticDuration success = 1; + + //@@ .. cpp:var:: StatisticDuration fail + //@@ + //@@ Cumulative count and duration for failed inference + //@@ request. + //@@ + StatisticDuration fail = 2; + + //@@ .. cpp:var:: StatisticDuration queue + //@@ + //@@ The count and cumulative duration that inference requests wait in + //@@ scheduling or other queues. The "queue" count and cumulative + //@@ duration includes cache hits. + //@@ + StatisticDuration queue = 3; + + //@@ .. cpp:var:: StatisticDuration compute_input + //@@ + //@@ The count and cumulative duration to prepare input tensor data as + //@@ required by the model framework / backend. For example, this duration + //@@ should include the time to copy input tensor data to the GPU. + //@@ The "compute_input" count and cumulative duration do not account for + //@@ requests that were a cache hit. See the "cache_hit" field for more + //@@ info. + //@@ + StatisticDuration compute_input = 4; + + //@@ .. cpp:var:: StatisticDuration compute_infer + //@@ + //@@ The count and cumulative duration to execute the model. + //@@ The "compute_infer" count and cumulative duration do not account for + //@@ requests that were a cache hit. See the "cache_hit" field for more + //@@ info. + //@@ + StatisticDuration compute_infer = 5; + + //@@ .. cpp:var:: StatisticDuration compute_output + //@@ + //@@ The count and cumulative duration to extract output tensor data + //@@ produced by the model framework / backend. For example, this duration + //@@ should include the time to copy output tensor data from the GPU. + //@@ The "compute_output" count and cumulative duration do not account for + //@@ requests that were a cache hit. See the "cache_hit" field for more + //@@ info. + //@@ + StatisticDuration compute_output = 6; + + //@@ .. cpp:var:: StatisticDuration cache_hit + //@@ + //@@ The count of response cache hits and cumulative duration to lookup + //@@ and extract output tensor data from the Response Cache on a cache + //@@ hit. For example, this duration should include the time to copy + //@@ output tensor data from the Response Cache to the response object. + //@@ On cache hits, triton does not need to go to the model/backend + //@@ for the output tensor data, so the "compute_input", "compute_infer", + //@@ and "compute_output" fields are not updated. Assuming the response + //@@ cache is enabled for a given model, a cache hit occurs for a + //@@ request to that model when the request metadata (model name, + //@@ model version, model inputs) hashes to an existing entry in the + //@@ cache. On a cache miss, the request hash and response output tensor + //@@ data is added to the cache. See response cache docs for more info: + //@@ + // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md + //@@ + StatisticDuration cache_hit = 7; + + //@@ .. cpp:var:: StatisticDuration cache_miss + //@@ + //@@ The count of response cache misses and cumulative duration to lookup + //@@ and insert output tensor data from the computed response to the + // cache. + //@@ For example, this duration should include the time to copy + //@@ output tensor data from the response object to the Response Cache. + //@@ Assuming the response cache is enabled for a given model, a cache + //@@ miss occurs for a request to that model when the request metadata + //@@ does NOT hash to an existing entry in the cache. See the response + //@@ cache docs for more info: + //@@ + // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md + //@@ + StatisticDuration cache_miss = 8; +} + +//@@ +//@@.. cpp:var:: message InferResponseStatistics +//@@ +//@@ Statistics per response. +//@@ +message InferResponseStatistics +{ + //@@ .. cpp:var:: StatisticDuration compute_infer + //@@ + //@@ The count and cumulative duration to compute a response. + //@@ + StatisticDuration compute_infer = 1; + + //@@ .. cpp:var:: StatisticDuration compute_output + //@@ + //@@ The count and cumulative duration to extract the output tensors of a + //@@ response. + //@@ + StatisticDuration compute_output = 2; + + //@@ .. cpp:var:: StatisticDuration success + //@@ + //@@ The count and cumulative duration for successful responses. + //@@ + StatisticDuration success = 3; + + //@@ .. cpp:var:: StatisticDuration fail + //@@ + //@@ The count and cumulative duration for failed responses. + //@@ + StatisticDuration fail = 4; + + //@@ .. cpp:var:: StatisticDuration empty_response + //@@ + //@@ The count and cumulative duration for empty responses. + //@@ + StatisticDuration empty_response = 5; + + //@@ .. cpp:var:: StatisticDuration cancel + //@@ + //@@ The count and cumulative duration, for cleaning up resources held by + //@@ a cancelled request, for cancelled responses. + //@@ + StatisticDuration cancel = 6; +} + +//@@ +//@@.. cpp:var:: message InferBatchStatistics +//@@ +//@@ Inference batch statistics. +//@@ +message InferBatchStatistics +{ + //@@ .. cpp:var:: uint64 batch_size + //@@ + //@@ The size of the batch. + //@@ + uint64 batch_size = 1; + + //@@ .. cpp:var:: StatisticDuration compute_input + //@@ + //@@ The count and cumulative duration to prepare input tensor data as + //@@ required by the model framework / backend with the given batch size. + //@@ For example, this duration should include the time to copy input + //@@ tensor data to the GPU. + //@@ + StatisticDuration compute_input = 2; + + //@@ .. cpp:var:: StatisticDuration compute_infer + //@@ + //@@ The count and cumulative duration to execute the model with the given + //@@ batch size. + //@@ + StatisticDuration compute_infer = 3; + + //@@ .. cpp:var:: StatisticDuration compute_output + //@@ + //@@ The count and cumulative duration to extract output tensor data + //@@ produced by the model framework / backend with the given batch size. + //@@ For example, this duration should include the time to copy output + //@@ tensor data from the GPU. + //@@ + StatisticDuration compute_output = 4; +} + +//@@ +//@@.. cpp:var:: message MemoryUsage +//@@ +//@@ Memory usage. +//@@ +message MemoryUsage +{ + //@@ .. cpp:var:: string type + //@@ + //@@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU". + //@@ + string type = 1; + + //@@ .. cpp:var:: int64 id + //@@ + //@@ The id of the memory, typically used with "type" to identify + //@@ a device that hosts the memory. + //@@ + int64 id = 2; + + //@@ .. cpp:var:: uint64 byte_size + //@@ + //@@ The byte size of the memory. + //@@ + uint64 byte_size = 3; +} + +//@@ +//@@.. cpp:var:: message ModelStatistics +//@@ +//@@ Statistics for a specific model and version. +//@@ +message ModelStatistics +{ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. If not given returns statistics for all + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model. + //@@ + string version = 2; + + //@@ .. cpp:var:: uint64 last_inference + //@@ + //@@ The timestamp of the last inference request made for this model, + //@@ as milliseconds since the epoch. + //@@ + uint64 last_inference = 3; + + //@@ .. cpp:var:: uint64 last_inference + //@@ + //@@ The cumulative count of successful inference requests made for this + //@@ model. Each inference in a batched request is counted as an + //@@ individual inference. For example, if a client sends a single + //@@ inference request with batch size 64, "inference_count" will be + //@@ incremented by 64. Similarly, if a clients sends 64 individual + //@@ requests each with batch size 1, "inference_count" will be + //@@ incremented by 64. The "inference_count" value DOES NOT include + //@@ cache hits. + //@@ + uint64 inference_count = 4; + + //@@ .. cpp:var:: uint64 last_inference + //@@ + //@@ The cumulative count of the number of successful inference executions + //@@ performed for the model. When dynamic batching is enabled, a single + //@@ model execution can perform inferencing for more than one inference + //@@ request. For example, if a clients sends 64 individual requests each + //@@ with batch size 1 and the dynamic batcher batches them into a single + //@@ large batch for model execution then "execution_count" will be + //@@ incremented by 1. If, on the other hand, the dynamic batcher is not + //@@ enabled for that each of the 64 individual requests is executed + //@@ independently, then "execution_count" will be incremented by 64. + //@@ The "execution_count" value DOES NOT include cache hits. + //@@ + uint64 execution_count = 5; + + //@@ .. cpp:var:: InferStatistics inference_stats + //@@ + //@@ The aggregate statistics for the model/version. + //@@ + InferStatistics inference_stats = 6; + + //@@ .. cpp:var:: InferBatchStatistics batch_stats (repeated) + //@@ + //@@ The aggregate statistics for each different batch size that is + //@@ executed in the model. The batch statistics indicate how many actual + //@@ model executions were performed and show differences due to different + //@@ batch size (for example, larger batches typically take longer to + //@@ compute). + //@@ + repeated InferBatchStatistics batch_stats = 7; + + //@@ .. cpp:var:: MemoryUsage memory_usage (repeated) + //@@ + //@@ The memory usage detected during model loading, which may be used to + //@@ estimate the memory to be released once the model is unloaded. Note + //@@ that the estimation is inferenced by the profiling tools and + //@@ framework's memory schema, therefore it is advised to perform + //@@ experiments to understand the scenario that the reported memory usage + //@@ can be relied on. As a starting point, the GPU memory usage for + //@@ models in ONNX Runtime backend and TensorRT backend is usually + //@@ aligned. + //@@ + repeated MemoryUsage memory_usage = 8; + + //@@ .. cpp:var:: map response_stats + //@@ + //@@ The key and value pairs for all responses statistics. The key is a + //@@ string identifying a set of response statistics aggregated together + //@@ (i.e. index of the response sent). The value is the aggregated + //@@ response statistics. + //@@ + map response_stats = 9; +} + +//@@ +//@@.. cpp:var:: message ModelStatisticsResponse +//@@ +//@@ Response message for ModelStatistics. +//@@ +message ModelStatisticsResponse +{ + //@@ .. cpp:var:: ModelStatistics model_stats (repeated) + //@@ + //@@ Statistics for each requested model. + //@@ + repeated ModelStatistics model_stats = 1; +} + +//@@ +//@@.. cpp:var:: message ModelRepositoryParameter +//@@ +//@@ An model repository parameter value. +//@@ +message ModelRepositoryParameter +{ + //@@ .. cpp:var:: oneof parameter_choice + //@@ + //@@ The parameter value can be a string, an int64 or + //@@ a boolean + //@@ + oneof parameter_choice + { + //@@ .. cpp:var:: bool bool_param + //@@ + //@@ A boolean parameter value. + //@@ + bool bool_param = 1; + + //@@ .. cpp:var:: int64 int64_param + //@@ + //@@ An int64 parameter value. + //@@ + int64 int64_param = 2; + + //@@ .. cpp:var:: string string_param + //@@ + //@@ A string parameter value. + //@@ + string string_param = 3; + + //@@ .. cpp:var:: bytes bytes_param + //@@ + //@@ A bytes parameter value. + //@@ + bytes bytes_param = 4; + } +} + +//@@ +//@@.. cpp:var:: message RepositoryIndexRequest +//@@ +//@@ Request message for RepositoryIndex. +//@@ +message RepositoryIndexRequest +{ + //@@ .. cpp:var:: string repository_name + //@@ + //@@ The name of the repository. If empty the index is returned + //@@ for all repositories. + //@@ + string repository_name = 1; + + //@@ .. cpp:var:: bool ready + //@@ + //@@ If true returned only models currently ready for inferencing. + //@@ + bool ready = 2; +} + +//@@ +//@@.. cpp:var:: message RepositoryIndexResponse +//@@ +//@@ Response message for RepositoryIndex. +//@@ +message RepositoryIndexResponse +{ + //@@ + //@@ .. cpp:var:: message ModelIndex + //@@ + //@@ Index entry for a model. + //@@ + message ModelIndex + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. + //@@ + string name = 1; + + //@@ .. cpp:var:: string version + //@@ + //@@ The version of the model. + //@@ + string version = 2; + + //@@ + //@@ .. cpp:var:: string state + //@@ + //@@ The state of the model. + //@@ + string state = 3; + + //@@ + //@@ .. cpp:var:: string reason + //@@ + //@@ The reason, if any, that the model is in the given state. + //@@ + string reason = 4; + } + + //@@ + //@@ .. cpp:var:: ModelIndex models (repeated) + //@@ + //@@ An index entry for each model. + //@@ + repeated ModelIndex models = 1; +} + +//@@ +//@@.. cpp:var:: message RepositoryModelLoadRequest +//@@ +//@@ Request message for RepositoryModelLoad. +//@@ +message RepositoryModelLoadRequest +{ + //@@ .. cpp:var:: string repository_name + //@@ + //@@ The name of the repository to load from. If empty the model + //@@ is loaded from any repository. + //@@ + string repository_name = 1; + + //@@ .. cpp:var:: string repository_name + //@@ + //@@ The name of the model to load, or reload. + //@@ + string model_name = 2; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional model repository request parameters. + //@@ + map parameters = 3; +} + +//@@ +//@@.. cpp:var:: message RepositoryModelLoadResponse +//@@ +//@@ Response message for RepositoryModelLoad. +//@@ +message RepositoryModelLoadResponse {} + +//@@ +//@@.. cpp:var:: message RepositoryModelUnloadRequest +//@@ +//@@ Request message for RepositoryModelUnload. +//@@ +message RepositoryModelUnloadRequest +{ + //@@ .. cpp:var:: string repository_name + //@@ + //@@ The name of the repository from which the model was originally + //@@ loaded. If empty the repository is not considered. + //@@ + string repository_name = 1; + + //@@ .. cpp:var:: string repository_name + //@@ + //@@ The name of the model to unload. + //@@ + string model_name = 2; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional model repository request parameters. + //@@ + map parameters = 3; +} + +//@@ +//@@.. cpp:var:: message RepositoryModelUnloadResponse +//@@ +//@@ Response message for RepositoryModelUnload. +//@@ +message RepositoryModelUnloadResponse {} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryStatusRequest +//@@ +//@@ Request message for SystemSharedMemoryStatus. +//@@ +message SystemSharedMemoryStatusRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the region to get status for. If empty the + //@@ status is returned for all registered regions. + //@@ + string name = 1; +} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryStatusResponse +//@@ +//@@ Response message for SystemSharedMemoryStatus. +//@@ +message SystemSharedMemoryStatusResponse +{ + //@@ + //@@ .. cpp:var:: message RegionStatus + //@@ + //@@ Status for a shared memory region. + //@@ + message RegionStatus + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name for the shared memory region. + //@@ + string name = 1; + + //@@ .. cpp:var:: string shared_memory_key + //@@ + //@@ The key of the underlying memory object that contains the + //@@ shared memory region. + //@@ + string key = 2; + + //@@ .. cpp:var:: uint64 offset + //@@ + //@@ Offset, in bytes, within the underlying memory object to + //@@ the start of the shared memory region. + //@@ + uint64 offset = 3; + + //@@ .. cpp:var:: uint64 byte_size + //@@ + //@@ Size of the shared memory region, in bytes. + //@@ + uint64 byte_size = 4; + } + + //@@ + //@@ .. cpp:var:: map regions + //@@ + //@@ Status for each of the registered regions, indexed by + //@@ region name. + //@@ + map regions = 1; +} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest +//@@ +//@@ Request message for SystemSharedMemoryRegister. +//@@ +message SystemSharedMemoryRegisterRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the region to register. + //@@ + string name = 1; + + //@@ .. cpp:var:: string shared_memory_key + //@@ + //@@ The key of the underlying memory object that contains the + //@@ shared memory region. + //@@ + string key = 2; + + //@@ .. cpp:var:: uint64 offset + //@@ + //@@ Offset, in bytes, within the underlying memory object to + //@@ the start of the shared memory region. + //@@ + uint64 offset = 3; + + //@@ .. cpp:var:: uint64 byte_size + //@@ + //@@ Size of the shared memory region, in bytes. + //@@ + uint64 byte_size = 4; +} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse +//@@ +//@@ Response message for SystemSharedMemoryRegister. +//@@ +message SystemSharedMemoryRegisterResponse {} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest +//@@ +//@@ Request message for SystemSharedMemoryUnregister. +//@@ +message SystemSharedMemoryUnregisterRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the system region to unregister. If empty + //@@ all system shared-memory regions are unregistered. + //@@ + string name = 1; +} + +//@@ +//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse +//@@ +//@@ Response message for SystemSharedMemoryUnregister. +//@@ +message SystemSharedMemoryUnregisterResponse {} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryStatusRequest +//@@ +//@@ Request message for CudaSharedMemoryStatus. +//@@ +message CudaSharedMemoryStatusRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the region to get status for. If empty the + //@@ status is returned for all registered regions. + //@@ + string name = 1; +} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryStatusResponse +//@@ +//@@ Response message for CudaSharedMemoryStatus. +//@@ +message CudaSharedMemoryStatusResponse +{ + //@@ + //@@ .. cpp:var:: message RegionStatus + //@@ + //@@ Status for a shared memory region. + //@@ + message RegionStatus + { + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name for the shared memory region. + //@@ + string name = 1; + + //@@ .. cpp:var:: uin64 device_id + //@@ + //@@ The GPU device ID where the cudaIPC handle was created. + //@@ + uint64 device_id = 2; + + //@@ .. cpp:var:: uint64 byte_size + //@@ + //@@ Size of the shared memory region, in bytes. + //@@ + uint64 byte_size = 3; + } + + //@@ + //@@ .. cpp:var:: map regions + //@@ + //@@ Status for each of the registered regions, indexed by + //@@ region name. + //@@ + map regions = 1; +} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest +//@@ +//@@ Request message for CudaSharedMemoryRegister. +//@@ +message CudaSharedMemoryRegisterRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the region to register. + //@@ + string name = 1; + + //@@ .. cpp:var:: bytes raw_handle + //@@ + //@@ The raw serialized cudaIPC handle. + //@@ + bytes raw_handle = 2; + + //@@ .. cpp:var:: int64 device_id + //@@ + //@@ The GPU device ID on which the cudaIPC handle was created. + //@@ + int64 device_id = 3; + + //@@ .. cpp:var:: uint64 byte_size + //@@ + //@@ Size of the shared memory block, in bytes. + //@@ + uint64 byte_size = 4; +} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse +//@@ +//@@ Response message for CudaSharedMemoryRegister. +//@@ +message CudaSharedMemoryRegisterResponse {} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest +//@@ +//@@ Request message for CudaSharedMemoryUnregister. +//@@ +message CudaSharedMemoryUnregisterRequest +{ + //@@ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the cuda region to unregister. If empty + //@@ all cuda shared-memory regions are unregistered. + //@@ + string name = 1; +} + +//@@ +//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse +//@@ +//@@ Response message for CudaSharedMemoryUnregister. +//@@ +message CudaSharedMemoryUnregisterResponse {} + +//@@ +//@@.. cpp:var:: message TraceSettingRequest +//@@ +//@@ Request message for TraceSetting. +//@@ +message TraceSettingRequest +{ + //@@ + //@@ .. cpp:var:: message SettingValue + //@@ + //@@ The values to be associated with a trace setting. + //@@ If no value is provided, the setting will be clear and + //@@ the global setting value will be used. + //@@ + message SettingValue + { + //@@ + //@@ .. cpp:var:: string value (repeated) + //@@ + //@@ The value. + //@@ + repeated string value = 1; + } + + //@@ .. cpp:var:: map settings + //@@ + //@@ The new setting values to be updated, + //@@ settings that are not specified will remain unchanged. + //@@ + map settings = 1; + + //@@ + //@@ .. cpp:var:: string model_name + //@@ + //@@ The name of the model to apply the new trace settings. + //@@ If not given, the new settings will be applied globally. + //@@ + string model_name = 2; +} + +//@@ +//@@.. cpp:var:: message TraceSettingResponse +//@@ +//@@ Response message for TraceSetting. +//@@ +message TraceSettingResponse +{ + //@@ + //@@ .. cpp:var:: message SettingValue + //@@ + //@@ The values to be associated with a trace setting. + //@@ + message SettingValue + { + //@@ + //@@ .. cpp:var:: string value (repeated) + //@@ + //@@ The value. + //@@ + repeated string value = 1; + } + + //@@ .. cpp:var:: map settings + //@@ + //@@ The current trace settings, including any changes specified + //@@ by TraceSettingRequest. + //@@ + map settings = 1; +} + +//@@ +//@@.. cpp:var:: message LogSettingsRequest +//@@ +//@@ Request message for LogSettings. +//@@ +message LogSettingsRequest +{ + message SettingValue + { + oneof parameter_choice + { + //@@ .. cpp:var:: bool bool_param + //@@ + //@@ A boolean parameter value. + //@@ + bool bool_param = 1; + + //@@ .. cpp:var:: uint32 uint32_param + //@@ + //@@ An uint32 parameter value. + //@@ + uint32 uint32_param = 2; + + //@@ .. cpp:var:: string string_param + //@@ + //@@ A string parameter value. + //@@ + string string_param = 3; + } + } + //@@ .. cpp:var:: map settings + //@@ + //@@ The current log settings. + //@@ + map settings = 1; +} + +//@@ +//@@.. cpp:var:: message LogSettingsResponse +//@@ +//@@ Response message for LogSettings. +//@@ +message LogSettingsResponse +{ + message SettingValue + { + oneof parameter_choice + { + //@@ .. cpp:var:: bool bool_param + //@@ + //@@ A boolean parameter value. + //@@ + bool bool_param = 1; + + //@@ .. cpp:var:: uint32 uint32_param + //@@ + //@@ An int32 parameter value. + //@@ + uint32 uint32_param = 2; + + //@@ .. cpp:var:: string string_param + //@@ + //@@ A string parameter value. + //@@ + string string_param = 3; + } + } + //@@ .. cpp:var:: map settings + //@@ + //@@ The current log settings. + //@@ + map settings = 1; +}