feat: Move layer_filter_cb up to llama_kv_cache

gabe-l-hart · gabe-l-hart · commit 230314b42d8e · 2025-06-03T16:26:29.000-06:00
This will be needed by other cache types as well, so centralizing the
definition will make it more reusable.

Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h
@@ -21,9 +21,6 @@ class llama_kv_cache_unified : public llama_kv_cache {
 public:
     static uint32_t get_padding(const llama_cparams & cparams);
 
-    // this callback is used to filter out layers that should not be included in the cache
-    using layer_filter_cb = std::function<bool(int32_t il)>;
-
     llama_kv_cache_unified(
             const llama_model &  model,
               layer_filter_cb && filter,
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -4,7 +4,13 @@
 #include "llama-io.h"
 #include "llama-memory.h"
 
+#include <functional>
+
 struct llama_kv_cache : public llama_memory_i {
+
+    // this callback is used to filter out layers that should not be included in the cache
+    using layer_filter_cb = std::function<bool(int32_t il)>;
+
     virtual ~llama_kv_cache() = default;
 
     // split the input batch into a set of ubatches and verify that they can fit into the cache