|
1 |
| -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 1 | +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | //
|
3 | 3 | // Redistribution and use in source and binary forms, with or without
|
4 | 4 | // modification, are permitted provided that the following conditions
|
@@ -65,6 +65,7 @@ struct TRITONBACKEND_Response;
|
65 | 65 | struct TRITONBACKEND_Backend;
|
66 | 66 | struct TRITONBACKEND_Model;
|
67 | 67 | struct TRITONBACKEND_ModelInstance;
|
| 68 | +struct TRITONBACKEND_ModelInstanceResponseStatistics; |
68 | 69 | struct TRITONBACKEND_BackendAttribute;
|
69 | 70 | struct TRITONBACKEND_Batcher;
|
70 | 71 |
|
@@ -94,7 +95,7 @@ struct TRITONBACKEND_Batcher;
|
94 | 95 | /// }
|
95 | 96 | ///
|
96 | 97 | #define TRITONBACKEND_API_VERSION_MAJOR 1
|
97 |
| -#define TRITONBACKEND_API_VERSION_MINOR 18 |
| 98 | +#define TRITONBACKEND_API_VERSION_MINOR 19 |
98 | 99 |
|
99 | 100 | /// Get the TRITONBACKEND API version supported by Triton. This value
|
100 | 101 | /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
|
@@ -761,8 +762,9 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseOutput(
|
761 | 762 | /// \param send_flags Flags associated with the response. \see
|
762 | 763 | /// TRITONSERVER_ResponseCompleteFlag. \see
|
763 | 764 | /// TRITONSERVER_InferenceResponseCompleteFn_t.
|
764 |
| -/// \param error The TRITONSERVER_Error to send if the response is an |
765 |
| -/// error, or nullptr if the response is successful. |
| 765 | +/// \param error The TRITONSERVER_Error to send if the response is an error, or |
| 766 | +/// nullptr if the response is successful. The caller retains ownership to the |
| 767 | +/// error object and must free it with TRITONSERVER_ErrorDelete. |
766 | 768 | /// \return a TRITONSERVER_Error indicating success or failure.
|
767 | 769 | TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseSend(
|
768 | 770 | TRITONBACKEND_Response* response, const uint32_t send_flags,
|
@@ -1319,16 +1321,16 @@ TRITONBACKEND_ModelInstanceReportMemoryUsage(
|
1319 | 1321 | /// TRITONBACKEND_ModelInstanceExecute.
|
1320 | 1322 | ///
|
1321 | 1323 | /// TRITONBACKEND_ModelInstanceExecute()
|
1322 |
| -/// CAPTURE TIMESPACE (exec_start_ns) |
| 1324 | +/// CAPTURE TIMESTAMP (exec_start_ns) |
1323 | 1325 | /// < process input tensors to prepare them for inference
|
1324 | 1326 | /// execution, including copying the tensors to/from GPU if
|
1325 | 1327 | /// necessary>
|
1326 |
| -/// CAPTURE TIMESPACE (compute_start_ns) |
| 1328 | +/// CAPTURE TIMESTAMP (compute_start_ns) |
1327 | 1329 | /// < perform inference computations to produce outputs >
|
1328 |
| -/// CAPTURE TIMESPACE (compute_end_ns) |
| 1330 | +/// CAPTURE TIMESTAMP (compute_end_ns) |
1329 | 1331 | /// < allocate output buffers and extract output tensors, including
|
1330 | 1332 | /// copying the tensors to/from GPU if necessary>
|
1331 |
| -/// CAPTURE TIMESPACE (exec_end_ns) |
| 1333 | +/// CAPTURE TIMESTAMP (exec_end_ns) |
1332 | 1334 | /// return
|
1333 | 1335 | ///
|
1334 | 1336 | /// Note that these statistics are associated with a valid
|
@@ -1356,6 +1358,156 @@ TRITONBACKEND_ModelInstanceReportStatistics(
|
1356 | 1358 | const uint64_t compute_start_ns, const uint64_t compute_end_ns,
|
1357 | 1359 | const uint64_t exec_end_ns);
|
1358 | 1360 |
|
| 1361 | +/// Create a new inference response statistics object. |
| 1362 | +/// |
| 1363 | +/// \param response_statistics The new response statistics object to be created. |
| 1364 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1365 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1366 | +TRITONBACKEND_ModelInstanceResponseStatisticsNew( |
| 1367 | + TRITONBACKEND_ModelInstanceResponseStatistics** response_statistics); |
| 1368 | + |
| 1369 | +/// Delete an inference response statistics object. |
| 1370 | +/// |
| 1371 | +/// The caller retains ownership to the objects set on the deleted response |
| 1372 | +/// statistics object and must free them separately. |
| 1373 | +/// |
| 1374 | +/// \param response_statistics The response statistics object to be deleted. |
| 1375 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1376 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1377 | +TRITONBACKEND_ModelInstanceResponseStatisticsDelete( |
| 1378 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics); |
| 1379 | + |
| 1380 | +/// Set model instance to an inference response statistics object. |
| 1381 | +/// |
| 1382 | +/// \param response_statistics The response statistics object. |
| 1383 | +/// \param model_instance The model instance. |
| 1384 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1385 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1386 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance( |
| 1387 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1388 | + TRITONBACKEND_ModelInstance* model_instance); |
| 1389 | + |
| 1390 | +/// Set response factory to an inference response statistics object. |
| 1391 | +/// |
| 1392 | +/// \param response_statistics The response statistics object. |
| 1393 | +/// \param response_factory The response factory. |
| 1394 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1395 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1396 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory( |
| 1397 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1398 | + TRITONBACKEND_ResponseFactory* response_factory); |
| 1399 | + |
| 1400 | +/// Set response start time to an inference response statistics object. |
| 1401 | +/// |
| 1402 | +/// All timestamps should be reported in nanonseconds and collected using |
| 1403 | +/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent. |
| 1404 | +/// |
| 1405 | +/// For consistency of measurement across different backends, the timestamps |
| 1406 | +/// should be collected at the following points during |
| 1407 | +/// TRITONBACKEND_ModelInstanceExecute. |
| 1408 | +/// |
| 1409 | +/// TRITONBACKEND_ModelInstanceExecute() |
| 1410 | +/// < start of this response > |
| 1411 | +/// CAPTURE TIMESTAMP (response_start) |
| 1412 | +/// < generate this response > |
| 1413 | +/// CAPTURE TIMESTAMP (compute_output_start) |
| 1414 | +/// < allocate output buffers and extract output tensors, including copying |
| 1415 | +/// the tensors to/from GPU if necessary > |
| 1416 | +/// CAPTURE TIMESTAMP (response_end) |
| 1417 | +/// < end of this response > |
| 1418 | +/// return |
| 1419 | +/// |
| 1420 | +/// \param response_statistics The response statistics object. |
| 1421 | +/// \param response_start The response start time. |
| 1422 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1423 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1424 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart( |
| 1425 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1426 | + uint64_t response_start); |
| 1427 | + |
| 1428 | +/// Set compute output start time to an inference response statistics object. |
| 1429 | +/// |
| 1430 | +/// Do NOT set this compute output start time (or set it to 0), if reporting an |
| 1431 | +/// empty response. |
| 1432 | +/// |
| 1433 | +/// All timestamps should be reported in nanonseconds and collected using |
| 1434 | +/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent. |
| 1435 | +/// |
| 1436 | +/// For consistency of measurement across different backends, the timestamps |
| 1437 | +/// should be collected at the following points during |
| 1438 | +/// TRITONBACKEND_ModelInstanceExecute. |
| 1439 | +/// |
| 1440 | +/// TRITONBACKEND_ModelInstanceExecute() |
| 1441 | +/// < start of this response > |
| 1442 | +/// CAPTURE TIMESTAMP (response_start) |
| 1443 | +/// < generate this response > |
| 1444 | +/// CAPTURE TIMESTAMP (compute_output_start) |
| 1445 | +/// < allocate output buffers and extract output tensors, including copying |
| 1446 | +/// the tensors to/from GPU if necessary > |
| 1447 | +/// CAPTURE TIMESTAMP (response_end) |
| 1448 | +/// < end of this response > |
| 1449 | +/// return |
| 1450 | +/// |
| 1451 | +/// \param response_statistics The response statistics object. |
| 1452 | +/// \param compute_output_start The compute output start time. |
| 1453 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1454 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1455 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart( |
| 1456 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1457 | + uint64_t compute_output_start); |
| 1458 | + |
| 1459 | +/// Set response end time to an inference response statistics object. |
| 1460 | +/// |
| 1461 | +/// All timestamps should be reported in nanonseconds and collected using |
| 1462 | +/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent. |
| 1463 | +/// |
| 1464 | +/// For consistency of measurement across different backends, the timestamps |
| 1465 | +/// should be collected at the following points during |
| 1466 | +/// TRITONBACKEND_ModelInstanceExecute. |
| 1467 | +/// |
| 1468 | +/// TRITONBACKEND_ModelInstanceExecute() |
| 1469 | +/// < start of this response > |
| 1470 | +/// CAPTURE TIMESTAMP (response_start) |
| 1471 | +/// < generate this response > |
| 1472 | +/// CAPTURE TIMESTAMP (compute_output_start) |
| 1473 | +/// < allocate output buffers and extract output tensors, including copying |
| 1474 | +/// the tensors to/from GPU if necessary > |
| 1475 | +/// CAPTURE TIMESTAMP (response_end) |
| 1476 | +/// < end of this response > |
| 1477 | +/// return |
| 1478 | +/// |
| 1479 | +/// \param response_statistics The response statistics object. |
| 1480 | +/// \param response_end The response end time. |
| 1481 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1482 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1483 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd( |
| 1484 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1485 | + uint64_t response_end); |
| 1486 | + |
| 1487 | +/// Set error to an inference response statistics object. |
| 1488 | +/// |
| 1489 | +/// Use the same error object passed to the TRITONBACKEND_ResponseSend. \see |
| 1490 | +/// TRITONBACKEND_ResponseSend. |
| 1491 | +/// |
| 1492 | +/// \param response_statistics The response statistics object. |
| 1493 | +/// \param error The error object. |
| 1494 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1495 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1496 | +TRITONBACKEND_ModelInstanceResponseStatisticsSetError( |
| 1497 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics, |
| 1498 | + TRITONSERVER_Error* error); |
| 1499 | + |
| 1500 | +/// Record statistics for an inference response. |
| 1501 | +/// |
| 1502 | +/// The caller retains ownership to the response statistics and must free it |
| 1503 | +/// after this function returns. |
| 1504 | +/// |
| 1505 | +/// \param response_statistics The statistics to be recorded. |
| 1506 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 1507 | +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* |
| 1508 | +TRITONBACKEND_ModelInstanceReportResponseStatistics( |
| 1509 | + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics); |
| 1510 | + |
1359 | 1511 | /// Record statistics for the execution of an entire batch of
|
1360 | 1512 | /// inference requests.
|
1361 | 1513 | ///
|
|
0 commit comments