File tree Expand file tree Collapse file tree 2 files changed +27
-3
lines changed Expand file tree Collapse file tree 2 files changed +27
-3
lines changed Original file line number Diff line number Diff line change @@ -449,8 +449,32 @@ void llama_kv_cache_unified::restore() {
449
449
return ;
450
450
}
451
451
452
+ // TODO: tmp - move to llama_kv_cache_recurrent
453
+ if (recurrent) {
454
+ seq_rm (-1 , -1 , -1 );
455
+ return ;
456
+ }
457
+
458
+ uint32_t new_head = size;
459
+
452
460
for (auto & range : pending.ranges ) {
453
- seq_rm (-1 , range.p0 , range.p1 );
461
+ for (uint32_t i = range.c0 ; i < range.c1 ; ++i) {
462
+ cells[i].seq_id .clear ();
463
+
464
+ // keep count of the number of used cells
465
+ if (cells[i].pos >= 0 ) {
466
+ used--;
467
+ }
468
+
469
+ cells[i].pos = -1 ;
470
+ cells[i].src = -1 ;
471
+ }
472
+
473
+ new_head = std::min (new_head, range.c0 );
474
+ }
475
+
476
+ if (new_head != size && new_head < head) {
477
+ head = new_head;
454
478
}
455
479
}
456
480
Original file line number Diff line number Diff line change @@ -140,8 +140,8 @@ class llama_kv_cache_unified : public llama_kv_cache {
140
140
// commit/restore cache
141
141
142
142
struct slot_range {
143
- uint32_t p0 = 0 ;
144
- uint32_t p1 = 0 ;
143
+ uint32_t c0 = 0 ; // note: these are cell indices, not sequence positions
144
+ uint32_t c1 = 0 ;
145
145
};
146
146
147
147
struct {
You can’t perform that action at this time.
0 commit comments