Skip to content

Commit 199d74c

Browse files
committed
defrag : reset head + add comments
ggml-ci
1 parent 503dda2 commit 199d74c

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

src/llama-kv-cache-unified.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
449449

450450
cells.mv(i, dinfo.ids[i]);
451451
}
452+
453+
// reset the head so we can find the first free slot during the next ubatch
454+
head = 0;
452455
}
453456

454457
ggml_backend_sched_reset(sched);
@@ -1126,15 +1129,9 @@ llama_kv_cache_unified::defrag_info llama_kv_cache_unified::defrag_prepare(int32
11261129
const uint32_t max_moves = (n_max_nodes - 2*n_layer)/(6*n_layer);
11271130

11281131
// determine which KV cells to move where
1129-
//
1130-
// cell i moves to ids[i]
1131-
//
1132-
// if ids[i] == i || ids[i] == n_kv, then cell i is not moved
1133-
//
11341132
defrag_info res;
11351133
auto & ids = res.ids;
11361134

1137-
ids.clear();
11381135
ids.resize(n_kv, n_kv);
11391136

11401137
for (uint32_t i0 = 0; i0 < n_used; ++i0) {

src/llama-kv-cache-unified.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class llama_kv_cache_unified : public llama_kv_cache {
2727
using ubatch_heads = std::vector<uint32_t>;
2828

2929
struct defrag_info {
30+
// contains information about which cell moves where:
31+
// - cell i moves to ids[i]
32+
// - if ids[i] == i || ids[i] == ids.size(), then cell i is not moved
3033
std::vector<uint32_t> ids;
3134
};
3235

0 commit comments

Comments
 (0)