Skip to content

Commit 59a14d4

Browse files
committed
feat: Move layer_filter_cb up to llama_kv_cache
This will be needed by other cache types as well, so centralizing the definition will make it more reusable. Branch: HybridCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 5fbbb28 commit 59a14d4

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

src/llama-kv-cache.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ struct llama_model;
1919
struct llama_context;
2020

2121
struct llama_kv_cache : public llama_memory_i {
22+
23+
// some child types need to perform different caching for each layer, so
24+
// this callback can be used to determine which layers a given cache should
25+
// be used for
26+
using layer_filter_cb = std::function<bool(int32_t il)>;
27+
2228
virtual ~llama_kv_cache() = default;
2329

2430
// call if batch processing fails - restores the cache state
@@ -102,9 +108,6 @@ class llama_kv_cache_unified : public llama_kv_cache {
102108
public:
103109
static uint32_t get_padding(const llama_cparams & cparams);
104110

105-
// this callback is used to filter out layers that should not be included in the cache
106-
using layer_filter_cb = std::function<bool(int32_t il)>;
107-
108111
llama_kv_cache_unified(
109112
const llama_model & model,
110113
layer_filter_cb && filter,

0 commit comments

Comments
 (0)