context : fix recurrent reserve

ggerganov · ggerganov · commit a5a85a3bc0c4 · 2025-02-24T08:59:12.000+02:00
ggml-ci
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent(
 llama_context_recurrent::~llama_context_recurrent() = default;
 
 void llama_context_recurrent::reserve() {
+    // simulate full KV cache
+    kv_self.n = kv_self.size;
+
+    LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n);
+
     // TODO: implement recurrent-specific reserve logic
     llama_context::reserve();
 }
diff --git a/src/llama-context.h b/src/llama-context.h
@@ -447,6 +447,7 @@ class llama_context_kv_self : public llama_context {
         ggml_tensor * self_k_shift;         // I32 [kv_size]
     } inp;
 
+protected:
     //
     // graph
     //
@@ -570,6 +571,7 @@ class llama_context_recurrent : public llama_context {
         ggml_tensor * s_mask; // F32 [1, n_kv]
     } inp;
 
+protected:
     //
     // graph
     //