Skip to content

Commit f59101d

Browse files
authored
Add TRITONCACHE APIs, CacheManager, CacheEntry (#171)
1 parent b6070e2 commit f59101d

28 files changed

+2626
-1468
lines changed

CMakeLists.txt

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -88,6 +88,26 @@ target_include_directories(
8888
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
8989
)
9090

91+
#
92+
# Triton Cache API
93+
#
94+
add_library(
95+
triton-core-cacheapi INTERFACE
96+
)
97+
98+
add_library(
99+
TritonCore::triton-core-cacheapi ALIAS triton-core-cacheapi
100+
)
101+
102+
target_include_directories(
103+
triton-core-cacheapi
104+
INTERFACE
105+
$<INSTALL_INTERFACE:include>
106+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
107+
)
108+
109+
110+
91111
#
92112
# Stub library for libtritonserver.so that stubs Triton Server API and
93113
# Triton Backend API
@@ -106,6 +126,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
106126
message("Using MSVC as compiler, default target on Windows 10. "
107127
"If the target system is not Windows 10, please update _WIN32_WINNT "
108128
"to corresponding value.")
129+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17")
109130
target_compile_options(
110131
triton-core-serverstub
111132
PRIVATE
@@ -310,6 +331,7 @@ install(
310331
TARGETS
311332
triton-core-backendapi
312333
triton-core-repoagentapi
334+
triton-core-cacheapi
313335
triton-core-serverapi
314336
EXPORT
315337
triton-core-targets

include/triton/core/tritoncache.h

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// Redistribution and use in source and binary forms, with or without
4+
// modification, are permitted provided that the following conditions
5+
// are met:
6+
// * Redistributions of source code must retain the above copyright
7+
// notice, this list of conditions and the following disclaimer.
8+
// * Redistributions in binary form must reproduce the above copyright
9+
// notice, this list of conditions and the following disclaimer in the
10+
// documentation and/or other materials provided with the distribution.
11+
// * Neither the name of NVIDIA CORPORATION nor the names of its
12+
// contributors may be used to endorse or promote products derived
13+
// from this software without specific prior written permission.
14+
//
15+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
#pragma once
27+
28+
#include <stddef.h>
29+
#include <stdint.h>
30+
#include "triton/core/tritonserver.h"
31+
32+
#ifdef __cplusplus
33+
extern "C" {
34+
#endif
35+
36+
#ifdef _COMPILING_TRITONCACHE
37+
#if defined(_MSC_VER)
38+
#define TRITONCACHE_DECLSPEC __declspec(dllexport)
39+
#define TRITONCACHE_ISPEC __declspec(dllimport)
40+
#elif defined(__GNUC__)
41+
#define TRITONCACHE_DECLSPEC __attribute__((__visibility__("default")))
42+
#define TRITONCACHE_ISPEC
43+
#else
44+
#define TRITONCACHE_DECLSPEC
45+
#define TRITONCACHE_ISPEC
46+
#endif
47+
#else
48+
#if defined(_MSC_VER)
49+
#define TRITONCACHE_DECLSPEC __declspec(dllimport)
50+
#define TRITONCACHE_ISPEC __declspec(dllexport)
51+
#else
52+
#define TRITONCACHE_DECLSPEC
53+
#define TRITONCACHE_ISPEC
54+
#endif
55+
#endif
56+
57+
struct TRITONCACHE_Cache;
58+
struct TRITONCACHE_CacheEntry;
59+
struct TRITONCACHE_Allocator;
60+
61+
///
62+
/// TRITONCACHE API Version
63+
///
64+
/// The TRITONCACHE API is versioned with major and minor version
65+
/// numbers. Any change to the API that does not impact backwards
66+
/// compatibility (for example, adding a non-required function)
67+
/// increases the minor version number. Any change that breaks
68+
/// backwards compatibility (for example, deleting or changing the
69+
/// behavior of a function) increases the major version number. A
70+
/// cache implementation should check that the API version used to compile
71+
/// the cache is compatible with the API version of the Triton server
72+
/// that it is running in. This is typically done by code similar to
73+
/// the following which makes sure that the major versions are equal
74+
/// and that the minor version of Triton is >= the minor version used
75+
/// to build the cache.
76+
///
77+
/// uint32_t api_version_major, api_version_minor;
78+
/// TRITONCACHE_ApiVersion(&api_version_major, &api_version_minor);
79+
/// if ((api_version_major != TRITONCACHE_API_VERSION_MAJOR) ||
80+
/// (api_version_minor < TRITONCACHE_API_VERSION_MINOR)) {
81+
/// return TRITONSERVER_ErrorNew(
82+
/// TRITONSERVER_ERROR_UNSUPPORTED,
83+
/// "triton cache API version does not support this cache");
84+
/// }
85+
///
86+
#define TRITONCACHE_API_VERSION_MAJOR 0
87+
#define TRITONCACHE_API_VERSION_MINOR 1
88+
89+
/// Get the TRITONCACHE API version supported by Triton. This
90+
/// value can be compared against the
91+
/// TRITONCACHE_API_VERSION_MAJOR and
92+
/// TRITONCACHE_API_VERSION_MINOR used to build the cache to
93+
/// ensure that Triton is compatible with the cache.
94+
///
95+
/// \param major Returns the TRITONCACHE API major version supported
96+
/// by Triton.
97+
/// \param minor Returns the TRITONCACHE API minor version supported
98+
/// by Triton.
99+
/// \return a TRITONSERVER_Error indicating success or failure.
100+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_ApiVersion(
101+
uint32_t* major, uint32_t* minor);
102+
103+
/// Get the number of buffers held by entry
104+
///
105+
/// \param entry The CacheEntry object to query.
106+
/// \param count Returns the number of buffers in entry.
107+
/// \return a TRITONSERVER_Error indicating success or failure.
108+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryBufferCount(
109+
TRITONCACHE_CacheEntry* entry, size_t* count);
110+
111+
/// Adds buffer to entry.
112+
///
113+
/// NOTE: (DLIS-2673) Only buffers in CPU memory supported currently.
114+
///
115+
/// \param entry The CacheEntry object to add buffer to.
116+
/// \param base The base address of the buffer to add.
117+
/// \param buffer_attributes The buffer attributes associated with the buffer.
118+
/// The caller must create the buffer attributes object, and set the relevant
119+
/// fields through the BufferAttributes API.
120+
/// \return a TRITONSERVER_Error indicating success or failure.
121+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryAddBuffer(
122+
TRITONCACHE_CacheEntry* entry, void* base,
123+
TRITONSERVER_BufferAttributes* buffer_attributes);
124+
125+
/// Gets the buffer at index from entry.
126+
///
127+
/// The caller does not own the returned buffer and must not modify or delete
128+
/// it. The lifetime of the buffer extends until 'entry' is deleted. If the
129+
/// buffer needs to persist long term, the caller should make a copy.
130+
///
131+
/// NOTE: Currently in the context of Triton, this API is used for the cache
132+
/// implementation to access the buffers from the opaque entry object passed by
133+
/// Triton in TRITONCACHE_CacheInsert. It is expected that the cache
134+
/// will get the buffer, and perform any necessary copy within the
135+
/// TRITONCACHE_CacheInsert implementation. After TRITONCACHE_CacheInsert
136+
/// returns, there is no guarantee that Triton won't delete the entry holding
137+
/// the buffer. This is also why the caller is expected to create and own the
138+
/// BufferAttributes object, as a copy would be needed otherwise anyway.
139+
///
140+
/// \param entry The CacheEntry object owning the buffer.
141+
/// \param index The index of the buffer, must be 0 <= index < count, where
142+
/// 'count' is the value returned by TRITONCACHE_CacheEntryBufferCount.
143+
/// \param base The base address of the buffer at index that is returned.
144+
/// \param buffer_attributes The buffer attributes associated with the buffer.
145+
/// The caller must create the buffer attributes object, then Triton will
146+
/// internally set the relevant fields on this object through the
147+
/// BufferAttributes API.
148+
/// \return a TRITONSERVER_Error indicating success or failure.
149+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntryGetBuffer(
150+
TRITONCACHE_CacheEntry* entry, size_t index, void** base,
151+
TRITONSERVER_BufferAttributes* buffer_attributes);
152+
153+
/// Sets the buffer at index in entry.
154+
///
155+
/// The entry does not own the buffer and will generally not modify or delete
156+
/// it.
157+
///
158+
/// NOTE: Currently in the context of Triton, this API is used to allow the
159+
/// cache implementation to allocate/provide buffers for Triton to copy
160+
/// directly into in TRITONCACHE_CacheInsert to avoid intermediate copies.
161+
///
162+
/// \param entry The CacheEntry object owning the buffer.
163+
/// \param index The index of the buffer, must be 0 <= index < count, where
164+
/// 'count' is the value returned by TRITONCACHE_CacheEntryBufferCount.
165+
/// \param base The base address of the new buffer to set at index.
166+
/// \param buffer_attributes (optional) buffer attributes associated with the
167+
/// buffer to overwrite existing attributes. If the entry already has a buffer
168+
/// with the same attributes, there is no need to provide a new one.
169+
/// \return a TRITONSERVER_Error indicating success or failure.
170+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_CacheEntrySetBuffer(
171+
TRITONCACHE_CacheEntry* entry, size_t index, void* new_base,
172+
TRITONSERVER_BufferAttributes* buffer_attributes);
173+
174+
/// Callback that can be used with a custom allocator to prepare buffers,
175+
/// and copy to/from the entry.
176+
///
177+
/// For example:
178+
/// TRITONCACHE_CacheLookup:
179+
/// The cache can provide cache-allocated buffers directly in the entry
180+
/// object, and can use this callback + allocator to allocate buffers
181+
/// on the Triton side that require some cache metadata before allocation
182+
/// (ex: size of cached data). After, Triton can copy directly from the
183+
/// cache buffers into the Triton-allocated buffers.
184+
/// TRITONCACHE_CacheInsert:
185+
/// The cache can provide cache-allocated buffers directly in the entry
186+
/// object through the TRITONCACHE_CacheEntrySetBuffer API, then use this
187+
/// callback to copy buffers from Triton into the cache-allocated buffers.
188+
///
189+
/// \param allocator Allocator that prepares buffers to copy to or from.
190+
/// \param entry The entry containing buffers and buffer attributes to copy from
191+
TRITONCACHE_DECLSPEC TRITONSERVER_Error* TRITONCACHE_Copy(
192+
TRITONCACHE_Allocator* allocator, TRITONCACHE_CacheEntry* entry);
193+
194+
///
195+
/// The following functions can be implemented by a cache. Functions
196+
/// indicated as required must be implemented or the cache will fail
197+
/// to load.
198+
///
199+
200+
/// Intialize a new cache object.
201+
///
202+
/// This function is required to be implemented by the cache.
203+
///
204+
/// The caller takes ownership of the
205+
/// TRITONCACHE_Cache object and must call
206+
/// TRITONCACHE_CacheFinalize to cleanup and release the object.
207+
///
208+
/// This API is implemented by the user-provided cache implementation,
209+
/// so specific details will be found within the cache implementation.
210+
///
211+
/// \param cache Returns the new cache object.
212+
/// \param config The config options to initialize the cache with.
213+
/// This will be passed as-is to the cache implementation, so
214+
/// the expected format and parsing is up to the cache as well.
215+
/// \return a TRITONSERVER_Error indicating success or failure.
216+
TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheInitialize(
217+
TRITONCACHE_Cache** cache, const char* config);
218+
219+
/// Cleanup a cache object.
220+
///
221+
/// This function is required to be implemented by the cache.
222+
///
223+
/// This API is implemented by the user-provided cache implementation,
224+
/// so specific details will be found within the cache implementation.
225+
///
226+
/// \param cache The cache object to delete.
227+
/// \return a TRITONSERVER_Error indicating success or failure.
228+
TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheFinalize(
229+
TRITONCACHE_Cache* cache);
230+
231+
/// Inserts entry into cache at specified key. Typically this will fail
232+
/// if the key already exists, but a cache implementation may decide to allow
233+
/// overwriting entries for existing keys.
234+
///
235+
/// This function is required to be implemented by the cache.
236+
///
237+
/// This API is implemented by the user-provided cache implementation,
238+
/// so specific details will be found within the cache implementation.
239+
///
240+
/// \param cache The object that is used to communicate with the cache
241+
/// implementation through a shared library.
242+
/// \param key The key used to access the cache. Generally, this is some
243+
/// unique value or hash representing the entry to avoid collisions.
244+
/// \param entry The entry to be inserted into the cache.
245+
/// \param allocator TritonCacheAllocator that is used to copy data directly
246+
/// into cache-provided buffers to avoid intermediate copies.
247+
/// The cache implementation expects that the entry will hold
248+
/// the requested buffer sizes. Then the cache implementation
249+
/// is expected to set the buffer addresses in the entry to
250+
/// the cache-allocated buffers through the
251+
/// TRITONCACHE_CacheEntrySetBuffer API.
252+
/// \return a TRITONSERVER_Error indicating success or failure.
253+
/// Specific errors will be up the cache implementation, but general
254+
/// error best practices that should be followed are as follows:
255+
/// - TRITONSERVER_ERROR_INVALID_ARG
256+
/// - bad argument passed, nullptr, etc.
257+
/// - TRITONSERVER_ERROR_ALREADY_EXISTS
258+
/// - key already exists and will not be inserted again
259+
/// - TRITONSERVER_ERROR_INTERNAL
260+
/// - internal errors
261+
/// - nullptr
262+
/// - success
263+
TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheInsert(
264+
TRITONCACHE_Cache* cache, const char* key, TRITONCACHE_CacheEntry* entry,
265+
TRITONCACHE_Allocator* allocator);
266+
267+
/// Retrieves entry from cache at specified key, if key exists.
268+
///
269+
/// This function is required to be implemented by the cache.
270+
///
271+
/// This API is implemented by the user-provided cache implementation,
272+
/// so specific details will be found within the cache implementation.
273+
///
274+
/// \param cache The object that is used to communicate with the cache
275+
/// implementation through a shared library.
276+
/// \param key The key used to access the cache. Generally, this is some
277+
/// unique value or hash representing the entry to avoid collisions.
278+
/// \param entry The entry to be retrieved from the cache.
279+
/// \param allocator TritonCacheAllocator that is used to copy cache data into
280+
/// into user provided buffers to avoid intermediate copies.
281+
/// \return a TRITONSERVER_Error indicating success or failure.
282+
/// Specific errors will be up the cache implementation, but general
283+
/// error best practices that should be followed are as follows:
284+
/// - TRITONSERVER_ERROR_INVALID_ARG
285+
/// - bad argument passed, nullptr, etc.
286+
/// - TRITONSERVER_ERROR_NOT_FOUND
287+
/// - key not found in cache
288+
/// - TRITONSERVER_ERROR_INTERNAL
289+
/// - other internal errors
290+
/// - nullptr
291+
/// - success
292+
TRITONCACHE_ISPEC TRITONSERVER_Error* TRITONCACHE_CacheLookup(
293+
TRITONCACHE_Cache* cache, const char* key, TRITONCACHE_CacheEntry* entry,
294+
TRITONCACHE_Allocator* allocator);
295+
296+
#ifdef __cplusplus
297+
} // extern C
298+
#endif

0 commit comments

Comments
 (0)