batch : fix check for empty sequences in memory (ggml-org#14364)

ggerganov · Minh141120 · commit f3bca293eda1 · 2025-07-05T23:52:50.000+07:00
* batch : fix check for empty sequences in memory

ggml-ci

* cont : reuse the var

ggml-ci
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
@@ -250,19 +250,21 @@ bool llama_batch_allocr::init(
             continue;
         }
 
-        if (memory) {
+        const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
+
+        if (p0 >= 0) {
             bool ok = true;
 
             if (batch.token) {
-                if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
+                if (seq_pos_min(s) != p0 + 1) {
                     ok = false;
                 }
             } else {
                 assert(batch.embd);
 
                 // for embeddings (typically used as vision input), we allow them to have repeating positions
                 // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
-                if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
+                if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
                     ok = false;
                 }
             }
@@ -273,7 +275,7 @@ bool llama_batch_allocr::init(
                         " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
                         " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
                         " it is required that the sequence positions remain consecutive: Y = X + 1\n",
-                        __func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s));
+                        __func__, s, s, p0, s, seq_pos_min(s));
 
                 return false;
             }

Original file line number	Diff line number	Diff line change
`@@ -250,19 +250,21 @@ bool llama_batch_allocr::init(`
`250`	`250`	`continue;`
`251`	`251`	`}`
`252`	`252`
`253`		`- if (memory) {`
	`253`	`+ const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;`
	`254`	`+`
	`255`	`+ if (p0 >= 0) {`
`254`	`256`	`bool ok = true;`
`255`	`257`
`256`	`258`	`if (batch.token) {`
`257`		`- if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) {`
	`259`	`+ if (seq_pos_min(s) != p0 + 1) {`
`258`	`260`	`ok = false;`
`259`	`261`	`}`
`260`	`262`	`} else {`
`261`	`263`	`assert(batch.embd);`
`262`	`264`
`263`	`265`	`// for embeddings (typically used as vision input), we allow them to have repeating positions`
`264`	`266`	`// ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762`
`265`		`- if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) {`
	`267`	`+ if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {`
`266`	`268`	`ok = false;`
`267`	`269`	`}`
`268`	`270`	`}`
`@@ -273,7 +275,7 @@ bool llama_batch_allocr::init(`
`273`	`275`	`" - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"`
`274`	`276`	`" - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"`
`275`	`277`	`" it is required that the sequence positions remain consecutive: Y = X + 1\n",`
`276`		`- __func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s));`
	`278`	`+ __func__, s, s, p0, s, seq_pos_min(s));`
`277`	`279`
`278`	`280`	`return false;`
`279`	`281`	`}`