fix: Fix shift logic to defer to unified cache

gabe-l-hart · gabe-l-hart · commit 71ce48e523d1 · 2025-06-10T16:21:57.000-06:00
Branch: HybridRecurrentCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache-hybrid-recurrent.cpp b/src/llama-kv-cache-hybrid-recurrent.cpp
@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
 }
 
 bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
-    // TODO: Should this return true if the attention cache can shift?
-    return false;
+    // Shifting is trivially supported for recurrent
+    return kv_attn->get_can_shift();
 }
 
 void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {

Original file line number	Diff line number	Diff line change
`@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {`
`150`	`150`	`}`
`151`	`151`
`152`	`152`	`bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {`
`153`		`- // TODO: Should this return true if the attention cache can shift?`
`154`		`- return false;`
	`153`	`+ // Shifting is trivially supported for recurrent`
	`154`	`+ return kv_attn->get_can_shift();`
`155`	`155`	`}`
`156`	`156`
`157`	`157`	`void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {`