Skip to content

Commit 1c0d991

Browse files
committed
kv-cache : add comments
ggml-ci
1 parent d82a2db commit 1c0d991

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

src/llama-kv-cache-unified.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,8 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
626626

627627
uint32_t n_tested = 0;
628628

629+
// for continuous slots, we test that all tokens in the ubatch fit, starting from the current head
630+
// for non-continuous slots, we test the tokens one by one
629631
const uint32_t n_test = cont ? n_tokens : 1;
630632

631633
slot_info res;
@@ -853,7 +855,7 @@ ggml_tensor * llama_kv_cache_unified::cpy_v(ggml_context * ctx, ggml_tensor * v_
853855
v_cur = ggml_permute(ctx, ggml_reshape_3d(ctx, v_cur, v_cur->ne[0], 1, v_cur->ne[1]), 2, 0, 1, 3);
854856

855857
// note: we can be more explicit here at the cost of extra cont
856-
// however, above we take advantage that a row of single element is always contiguous regardless of the row stride
858+
// however, above we take advantage that a row of single element is always continuous regardless of the row stride
857859
//v_cur = ggml_transpose(ctx, v_cur);
858860
//v_cur = ggml_cont_3d(ctx, v_cur, 1, v_cur->ne[0], v_cur->ne[1]);
859861

@@ -1868,6 +1870,7 @@ llama_kv_cache_unified_context::llama_kv_cache_unified_context(
18681870
llama_kv_cache_unified * kv) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv) {
18691871
n_kv = kv->get_size();
18701872

1873+
// create a dummy slot info - the actual data is irrelevant. we just need to build the graph
18711874
sinfos.resize(1);
18721875
sinfos[0].idxs.resize(1);
18731876
sinfos[0].idxs[0] = 0;

src/llama-kv-cache-unified.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class llama_kv_cache_unified : public llama_memory_i {
3535
std::vector<uint32_t> ids;
3636
};
3737

38+
// for each ubatch, create a slot_info that contains information about where the ubatch should be inserted in the
39+
// KV cells. for example, cell indices for each token, such that: token[i] -> goes to cells[idxs[i]]
3840
struct slot_info {
3941
// data for ggml_set_rows
4042
using idx_vec_t = std::vector<uint32_t>;

0 commit comments

Comments
 (0)