memory : fix broken batch splits for recurrent cache

compilade · compilade · commit 2ff3354c33bd · 2025-07-07T21:23:14.000-04:00
Splits producing more than one ubatch per batch for recurrent models were broken with #14512. This fixes it by moving the completeness check after the ubatch split loop.
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp
@@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                 ubatch = balloc.split_equal(n_ubatch, false);
             }
 
-            if (balloc.get_n_used() < balloc.get_n_tokens()) {
-                // failed to find a suitable split
+            if (ubatch.n_tokens == 0) {
                 break;
             }
 
             ubatches.push_back(std::move(ubatch)); // NOLINT
         }
 
+        if (balloc.get_n_used() < balloc.get_n_tokens()) {
+            // failed to find a suitable split
+            break;
+        }
+
         if (!prepare(ubatches)) {
             break;
         }

Original file line number	Diff line number	Diff line change
`@@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &`
`377`	`377`	`ubatch = balloc.split_equal(n_ubatch, false);`
`378`	`378`	`}`
`379`	`379`
`380`		`- if (balloc.get_n_used() < balloc.get_n_tokens()) {`
`381`		`- // failed to find a suitable split`
	`380`	`+ if (ubatch.n_tokens == 0) {`
`382`	`381`	`break;`
`383`	`382`	`}`
`384`	`383`
`385`	`384`	`ubatches.push_back(std::move(ubatch)); // NOLINT`
`386`	`385`	`}`
`387`	`386`
	`387`	`+ if (balloc.get_n_used() < balloc.get_n_tokens()) {`
	`388`	`+ // failed to find a suitable split`
	`389`	`+ break;`
	`390`	`+ }`
	`391`	`+`
`388`	`392`	`if (!prepare(ubatches)) {`
`389`	`393`	`break;`
`390`	`394`	`}`