Skip to content

Commit 623954b

Browse files
committed
llama : fix kv_cache restore logic
ggml-ci
1 parent 4fdd6e5 commit 623954b

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

src/llama-kv-cache.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,32 @@ void llama_kv_cache_unified::restore() {
449449
return;
450450
}
451451

452+
// TODO: tmp - move to llama_kv_cache_recurrent
453+
if (recurrent) {
454+
seq_rm(-1, -1, -1);
455+
return;
456+
}
457+
458+
uint32_t new_head = size;
459+
452460
for (auto & range : pending.ranges) {
453-
seq_rm(-1, range.p0, range.p1);
461+
for (uint32_t i = range.c0; i < range.c1; ++i) {
462+
cells[i].seq_id.clear();
463+
464+
// keep count of the number of used cells
465+
if (cells[i].pos >= 0) {
466+
used--;
467+
}
468+
469+
cells[i].pos = -1;
470+
cells[i].src = -1;
471+
}
472+
473+
new_head = std::min(new_head, range.c0);
474+
}
475+
476+
if (new_head != size && new_head < head) {
477+
head = new_head;
454478
}
455479
}
456480

src/llama-kv-cache.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ class llama_kv_cache_unified : public llama_kv_cache {
140140
// commit/restore cache
141141

142142
struct slot_range {
143-
uint32_t p0 = 0;
144-
uint32_t p1 = 0;
143+
uint32_t c0 = 0; // note: these are cell indices, not sequence positions
144+
uint32_t c1 = 0;
145145
};
146146

147147
struct {

0 commit comments

Comments
 (0)