feat: Move layer_filter_cb up to llama_kv_cache

gabe-l-hart · gabe-l-hart · commit 59a14d410b17 · 2025-05-20T14:17:22.000-06:00
This will be needed by other cache types as well, so centralizing the
definition will make it more reusable.

Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -19,6 +19,12 @@ struct llama_model;
 struct llama_context;
 
 struct llama_kv_cache : public llama_memory_i {
+
+    // some child types need to perform different caching for each layer, so
+    // this callback can be used to determine which layers a given cache should
+    // be used for
+    using layer_filter_cb = std::function<bool(int32_t il)>;
+
     virtual ~llama_kv_cache() = default;
 
     // call if batch processing fails - restores the cache state
@@ -102,9 +108,6 @@ class llama_kv_cache_unified : public llama_kv_cache {
 public:
     static uint32_t get_padding(const llama_cparams & cparams);
 
-    // this callback is used to filter out layers that should not be included in the cache
-    using layer_filter_cb = std::function<bool(int32_t il)>;
-
     llama_kv_cache_unified(
             const llama_model &  model,
               layer_filter_cb && filter,