memory : correctly handle failure in apply() (ggml-org#14438)

ggerganov · Minh141120 · commit 33134bd2fe98 · 2025-07-05T23:58:09.000+07:00
ggml-ci
diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp
@@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {
 }
 
 bool llama_kv_cache_unified_iswa_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
 
     bool res = true;
 
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
@@ -1785,7 +1785,7 @@ bool llama_kv_cache_unified_context::next() {
 }
 
 bool llama_kv_cache_unified_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
 
     // no ubatches -> this is a KV cache update
     if (ubatches.empty()) {
diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp
@@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {
 }
 
 bool llama_memory_hybrid_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
 
     bool res = true;
 
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp
@@ -1071,7 +1071,15 @@ bool llama_memory_recurrent_context::next() {
 }
 
 bool llama_memory_recurrent_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
+
+    // no ubatches -> this is an update
+    if (ubatches.empty()) {
+        // recurrent cache never performs updates
+        assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE);
+
+        return true;
+    }
 
     mem->find_slot(ubatches[i_next]);
 
diff --git a/src/llama-memory.h b/src/llama-memory.h
@@ -32,6 +32,9 @@ enum llama_memory_status {
 // useful for implementing hybrid memory types (e.g. iSWA)
 llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
 
+// helper function for checking if a memory status indicates a failure
+bool llama_memory_status_is_fail(llama_memory_status status);
+
 // the interface for managing the memory context during batch processing
 // this interface is implemented per memory type. see:
 //   - llama_kv_cache_unified_context

Original file line number	Diff line number	Diff line change
`@@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {`
`246`	`246`	`}`
`247`	`247`
`248`	`248`	`bool llama_kv_cache_unified_iswa_context::apply() {`
`249`		`- assert(status == LLAMA_MEMORY_STATUS_SUCCESS);`
	`249`	`+ assert(!llama_memory_status_is_fail(status));`
`250`	`250`
`251`	`251`	`bool res = true;`
`252`	`252`
Original file line number	Diff line number	Diff line change
`@@ -1785,7 +1785,7 @@ bool llama_kv_cache_unified_context::next() {`
`1785`	`1785`	`}`
`1786`	`1786`
`1787`	`1787`	`bool llama_kv_cache_unified_context::apply() {`
`1788`		`- assert(status == LLAMA_MEMORY_STATUS_SUCCESS);`
	`1788`	`+ assert(!llama_memory_status_is_fail(status));`
`1789`	`1789`
`1790`	`1790`	`// no ubatches -> this is a KV cache update`
`1791`	`1791`	`if (ubatches.empty()) {`
Original file line number	Diff line number	Diff line change
`@@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {`
`218`	`218`	`}`
`219`	`219`
`220`	`220`	`bool llama_memory_hybrid_context::apply() {`
`221`		`- assert(status == LLAMA_MEMORY_STATUS_SUCCESS);`
	`221`	`+ assert(!llama_memory_status_is_fail(status));`
`222`	`222`
`223`	`223`	`bool res = true;`
`224`	`224`