|
| 1 | +// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +// |
| 3 | +// Redistribution and use in source and binary forms, with or without |
| 4 | +// modification, are permitted provided that the following conditions |
| 5 | +// are met: |
| 6 | +// * Redistributions of source code must retain the above copyright |
| 7 | +// notice, this list of conditions and the following disclaimer. |
| 8 | +// * Redistributions in binary form must reproduce the above copyright |
| 9 | +// notice, this list of conditions and the following disclaimer in the |
| 10 | +// documentation and/or other materials provided with the distribution. |
| 11 | +// * Neither the name of NVIDIA CORPORATION nor the names of its |
| 12 | +// contributors may be used to endorse or promote products derived |
| 13 | +// from this software without specific prior written permission. |
| 14 | +// |
| 15 | +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY |
| 16 | +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 | +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 | +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| 19 | +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 | +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 23 | +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 24 | +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 | +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | +#pragma once |
| 27 | + |
| 28 | +#include <stddef.h> |
| 29 | +#include <stdint.h> |
| 30 | +#include "triton/core/tritonserver.h" |
| 31 | + |
| 32 | +#ifdef __cplusplus |
| 33 | +extern "C" { |
| 34 | +#endif |
| 35 | + |
| 36 | +#ifdef _COMPILING_TRITONCACHE |
| 37 | +#if defined(_MSC_VER) |
| 38 | +#define TRITONCACHE_DECLSPEC __declspec(dllexport) |
| 39 | +#define TRITONCACHE_ISPEC __declspec(dllimport) |
| 40 | +#elif defined(__GNUC__) |
| 41 | +#define TRITONCACHE_DECLSPEC __attribute__((__visibility__("default"))) |
| 42 | +#define TRITONCACHE_ISPEC |
| 43 | +#else |
| 44 | +#define TRITONCACHE_DECLSPEC |
| 45 | +#define TRITONCACHE_ISPEC |
| 46 | +#endif |
| 47 | +#else |
| 48 | +#if defined(_MSC_VER) |
| 49 | +#define TRITONCACHE_DECLSPEC __declspec(dllimport) |
| 50 | +#define TRITONCACHE_ISPEC __declspec(dllexport) |
| 51 | +#else |
| 52 | +#define TRITONCACHE_DECLSPEC |
| 53 | +#define TRITONCACHE_ISPEC |
| 54 | +#endif |
| 55 | +#endif |
| 56 | + |
| 57 | +struct TRITONCACHE_Cache; |
| 58 | +struct TRITONCACHE_CacheEntry; |
| 59 | +struct TRITONCACHE_Allocator; |
| 60 | + |
| 61 | +/// |
| 62 | +/// TRITONCACHE API Version |
| 63 | +/// |
| 64 | +/// The TRITONCACHE API is versioned with major and minor version |
| 65 | +/// numbers. Any change to the API that does not impact backwards |
| 66 | +/// compatibility (for example, adding a non-required function) |
| 67 | +/// increases the minor version number. Any change that breaks |
| 68 | +/// backwards compatibility (for example, deleting or changing the |
| 69 | +/// behavior of a function) increases the major version number. A |
| 70 | +/// cache implementation should check that the API version used to compile |
| 71 | +/// the cache is compatible with the API version of the Triton server |
| 72 | +/// that it is running in. This is typically done by code similar to |
| 73 | +/// the following which makes sure that the major versions are equal |
| 74 | +/// and that the minor version of Triton is >= the minor version used |
| 75 | +/// to build the cache. |
| 76 | +/// |
| 77 | +/// uint32_t api_version_major, api_version_minor; |
| 78 | +/// TRITONCACHE_ApiVersion(&api_version_major, &api_version_minor); |
| 79 | +/// if ((api_version_major != TRITONCACHE_API_VERSION_MAJOR) || |
| 80 | +/// (api_version_minor < TRITONCACHE_API_VERSION_MINOR)) { |
| 81 | +/// return TRITONSERVER_ErrorNew( |
| 82 | +/// TRITONSERVER_ERROR_UNSUPPORTED, |
| 83 | +/// "triton cache API version does not support this cache"); |
| 84 | +/// } |
| 85 | +/// |
| 86 | +#define TRITONCACHE_API_VERSION_MAJOR 0 |
| 87 | +#define TRITONCACHE_API_VERSION_MINOR 1 |
| 88 | + |
| 89 | +/// Get the TRITONCACHE API version supported by Triton. This |
| 90 | +/// value can be compared against the |
| 91 | +/// TRITONCACHE_API_VERSION_MAJOR and |
| 92 | +/// TRITONCACHE_API_VERSION_MINOR used to build the cache to |
| 93 | +/// ensure that Triton is compatible with the cache. |
| 94 | +/// |
| 95 | +/// \param major Returns the TRITONCACHE API major version supported |
| 96 | +/// by Triton. |
| 97 | +/// \param minor Returns the TRITONCACHE API minor version supported |
| 98 | +/// by Triton. |
| 99 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 100 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_ApiVersion( |
| 101 | + uint32_t* major, uint32_t* minor); |
| 102 | + |
| 103 | +/// Get the number of buffers held by entry |
| 104 | +/// |
| 105 | +/// \param entry The CacheEntry object to query. |
| 106 | +/// \param count Returns the number of buffers in entry. |
| 107 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 108 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryBufferCount( |
| 109 | + TRITONCACHE_CacheEntry* entry, size_t* count); |
| 110 | + |
| 111 | +/// Adds buffer to entry. |
| 112 | +/// |
| 113 | +/// NOTE: (DLIS-2673) Only buffers in CPU memory supported currently. |
| 114 | +/// |
| 115 | +/// \param entry The CacheEntry object to add buffer to. |
| 116 | +/// \param base The base address of the buffer to add. |
| 117 | +/// \param buffer_attributes The buffer attributes associated with the buffer. |
| 118 | +/// The caller must create the buffer attributes object, and set the relevant |
| 119 | +/// fields through the BufferAttributes API. |
| 120 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 121 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryAddBuffer( |
| 122 | + TRITONCACHE_CacheEntry* entry, void* base, |
| 123 | + TRITONSERVER_BufferAttributes* buffer_attributes); |
| 124 | + |
| 125 | +/// Gets the buffer at index from entry. |
| 126 | +/// |
| 127 | +/// The caller does not own the returned buffer and must not modify or delete |
| 128 | +/// it. The lifetime of the buffer extends until 'entry' is deleted. If the |
| 129 | +/// buffer needs to persist long term, the caller should make a copy. |
| 130 | +/// |
| 131 | +/// NOTE: Currently in the context of Triton, this API is used for the cache |
| 132 | +/// implementation to access the buffers from the opaque entry object passed by |
| 133 | +/// Triton in TRITONCACHE_CacheInsert. It is expected that the cache |
| 134 | +/// will get the buffer, and perform any necessary copy within the |
| 135 | +/// TRITONCACHE_CacheInsert implementation. After TRITONCACHE_CacheInsert |
| 136 | +/// returns, there is no guarantee that Triton won't delete the entry holding |
| 137 | +/// the buffer. This is also why the caller is expected to create and own the |
| 138 | +/// BufferAttributes object, as a copy would be needed otherwise anyway. |
| 139 | +/// |
| 140 | +/// \param entry The CacheEntry object owning the buffer. |
| 141 | +/// \param index The index of the buffer, must be 0 <= index < count, where |
| 142 | +/// 'count' is the value returned by TRITONCACHE_CacheEntryBufferCount. |
| 143 | +/// \param base The base address of the buffer at index that is returned. |
| 144 | +/// \param buffer_attributes The buffer attributes associated with the buffer. |
| 145 | +/// The caller must create the buffer attributes object, then Triton will |
| 146 | +/// internally set the relevant fields on this object through the |
| 147 | +/// BufferAttributes API. |
| 148 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 149 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryGetBuffer( |
| 150 | + TRITONCACHE_CacheEntry* entry, size_t index, void** base, |
| 151 | + TRITONSERVER_BufferAttributes* buffer_attributes); |
| 152 | + |
| 153 | +/// Sets the buffer at index in entry. |
| 154 | +/// |
| 155 | +/// The entry does not own the buffer and will generally not modify or delete |
| 156 | +/// it. |
| 157 | +/// |
| 158 | +/// NOTE: Currently in the context of Triton, this API is used to allow the |
| 159 | +/// cache implementation to allocate/provide buffers for Triton to copy |
| 160 | +/// directly into in TRITONCACHE_CacheInsert to avoid intermediate copies. |
| 161 | +/// |
| 162 | +/// \param entry The CacheEntry object owning the buffer. |
| 163 | +/// \param index The index of the buffer, must be 0 <= index < count, where |
| 164 | +/// 'count' is the value returned by TRITONCACHE_CacheEntryBufferCount. |
| 165 | +/// \param base The base address of the new buffer to set at index. |
| 166 | +/// \param buffer_attributes (optional) buffer attributes associated with the |
| 167 | +/// buffer to overwrite existing attributes. If the entry already has a buffer |
| 168 | +/// with the same attributes, there is no need to provide a new one. |
| 169 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 170 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntrySetBuffer( |
| 171 | + TRITONCACHE_CacheEntry* entry, size_t index, void* new_base, |
| 172 | + TRITONSERVER_BufferAttributes* buffer_attributes); |
| 173 | + |
| 174 | +/// Callback that can be used with a custom allocator to prepare buffers, |
| 175 | +/// and copy to/from the entry. |
| 176 | +/// |
| 177 | +/// For example: |
| 178 | +/// TRITONCACHE_CacheLookup: |
| 179 | +/// The cache can provide cache-allocated buffers directly in the entry |
| 180 | +/// object, and can use this callback + allocator to allocate buffers |
| 181 | +/// on the Triton side that require some cache metadata before allocation |
| 182 | +/// (ex: size of cached data). After, Triton can copy directly from the |
| 183 | +/// cache buffers into the Triton-allocated buffers. |
| 184 | +/// TRITONCACHE_CacheInsert: |
| 185 | +/// The cache can provide cache-allocated buffers directly in the entry |
| 186 | +/// object through the TRITONCACHE_CacheEntrySetBuffer API, then use this |
| 187 | +/// callback to copy buffers from Triton into the cache-allocated buffers. |
| 188 | +/// |
| 189 | +/// \param allocator Allocator that prepares buffers to copy to or from. |
| 190 | +/// \param entry The entry containing buffers and buffer attributes to copy from |
| 191 | +TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_Copy( |
| 192 | + TRITONCACHE_Allocator* allocator, TRITONCACHE_CacheEntry* entry); |
| 193 | + |
| 194 | +/// |
| 195 | +/// The following functions can be implemented by a cache. Functions |
| 196 | +/// indicated as required must be implemented or the cache will fail |
| 197 | +/// to load. |
| 198 | +/// |
| 199 | + |
| 200 | +/// Intialize a new cache object. |
| 201 | +/// |
| 202 | +/// This function is required to be implemented by the cache. |
| 203 | +/// |
| 204 | +/// The caller takes ownership of the |
| 205 | +/// TRITONCACHE_Cache object and must call |
| 206 | +/// TRITONCACHE_CacheFinalize to cleanup and release the object. |
| 207 | +/// |
| 208 | +/// This API is implemented by the user-provided cache implementation, |
| 209 | +/// so specific details will be found within the cache implementation. |
| 210 | +/// |
| 211 | +/// \param cache Returns the new cache object. |
| 212 | +/// \param config The config options to initialize the cache with. |
| 213 | +/// This will be passed as-is to the cache implementation, so |
| 214 | +/// the expected format and parsing is up to the cache as well. |
| 215 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 216 | +TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheInitialize( |
| 217 | + TRITONCACHE_Cache** cache, const char* config); |
| 218 | + |
| 219 | +/// Cleanup a cache object. |
| 220 | +/// |
| 221 | +/// This function is required to be implemented by the cache. |
| 222 | +/// |
| 223 | +/// This API is implemented by the user-provided cache implementation, |
| 224 | +/// so specific details will be found within the cache implementation. |
| 225 | +/// |
| 226 | +/// \param cache The cache object to delete. |
| 227 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 228 | +TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheFinalize( |
| 229 | + TRITONCACHE_Cache* cache); |
| 230 | + |
| 231 | +/// Inserts entry into cache at specified key. Typically this will fail |
| 232 | +/// if the key already exists, but a cache implementation may decide to allow |
| 233 | +/// overwriting entries for existing keys. |
| 234 | +/// |
| 235 | +/// This function is required to be implemented by the cache. |
| 236 | +/// |
| 237 | +/// This API is implemented by the user-provided cache implementation, |
| 238 | +/// so specific details will be found within the cache implementation. |
| 239 | +/// |
| 240 | +/// \param cache The object that is used to communicate with the cache |
| 241 | +/// implementation through a shared library. |
| 242 | +/// \param key The key used to access the cache. Generally, this is some |
| 243 | +/// unique value or hash representing the entry to avoid collisions. |
| 244 | +/// \param entry The entry to be inserted into the cache. |
| 245 | +/// \param allocator TritonCacheAllocator that is used to copy data directly |
| 246 | +/// into cache-provided buffers to avoid intermediate copies. |
| 247 | +/// The cache implementation expects that the entry will hold |
| 248 | +/// the requested buffer sizes. Then the cache implementation |
| 249 | +/// is expected to set the buffer addresses in the entry to |
| 250 | +/// the cache-allocated buffers through the |
| 251 | +/// TRITONCACHE_CacheEntrySetBuffer API. |
| 252 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 253 | +/// Specific errors will be up the cache implementation, but general |
| 254 | +/// error best practices that should be followed are as follows: |
| 255 | +/// - TRITONSERVER_ERROR_INVALID_ARG |
| 256 | +/// - bad argument passed, nullptr, etc. |
| 257 | +/// - TRITONSERVER_ERROR_ALREADY_EXISTS |
| 258 | +/// - key already exists and will not be inserted again |
| 259 | +/// - TRITONSERVER_ERROR_INTERNAL |
| 260 | +/// - internal errors |
| 261 | +/// - nullptr |
| 262 | +/// - success |
| 263 | +TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheInsert( |
| 264 | + TRITONCACHE_Cache* cache, const char* key, TRITONCACHE_CacheEntry* entry, |
| 265 | + TRITONCACHE_Allocator* allocator); |
| 266 | + |
| 267 | +/// Retrieves entry from cache at specified key, if key exists. |
| 268 | +/// |
| 269 | +/// This function is required to be implemented by the cache. |
| 270 | +/// |
| 271 | +/// This API is implemented by the user-provided cache implementation, |
| 272 | +/// so specific details will be found within the cache implementation. |
| 273 | +/// |
| 274 | +/// \param cache The object that is used to communicate with the cache |
| 275 | +/// implementation through a shared library. |
| 276 | +/// \param key The key used to access the cache. Generally, this is some |
| 277 | +/// unique value or hash representing the entry to avoid collisions. |
| 278 | +/// \param entry The entry to be retrieved from the cache. |
| 279 | +/// \param allocator TritonCacheAllocator that is used to copy cache data into |
| 280 | +/// into user provided buffers to avoid intermediate copies. |
| 281 | +/// \return a TRITONSERVER_Error indicating success or failure. |
| 282 | +/// Specific errors will be up the cache implementation, but general |
| 283 | +/// error best practices that should be followed are as follows: |
| 284 | +/// - TRITONSERVER_ERROR_INVALID_ARG |
| 285 | +/// - bad argument passed, nullptr, etc. |
| 286 | +/// - TRITONSERVER_ERROR_NOT_FOUND |
| 287 | +/// - key not found in cache |
| 288 | +/// - TRITONSERVER_ERROR_INTERNAL |
| 289 | +/// - other internal errors |
| 290 | +/// - nullptr |
| 291 | +/// - success |
| 292 | +TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheLookup( |
| 293 | + TRITONCACHE_Cache* cache, const char* key, TRITONCACHE_CacheEntry* entry, |
| 294 | + TRITONCACHE_Allocator* allocator); |
| 295 | + |
| 296 | +#ifdef __cplusplus |
| 297 | +} // extern C |
| 298 | +#endif |
0 commit comments