Skip to content

Commit 1b4fbc8

Browse files
committed
kv-cache : add comments
1 parent cbe971a commit 1b4fbc8

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/llama-kv-cache-unified.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ void llama_kv_cache_unified::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id
267267
const auto s1 = seq_to_stream[seq_id_dst];
268268

269269
if (s0 == s1) {
270+
// since both sequences are in the same stream, no data copy is necessary
271+
// we just have to update the cells meta data
272+
270273
auto & cells = v_cells[s0];
271274

272275
if (seq_id_src == seq_id_dst) {
@@ -294,6 +297,8 @@ void llama_kv_cache_unified::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id
294297
return;
295298
}
296299

300+
// cross-stream sequence copies require to copy the actual buffer data
301+
297302
bool is_full = true;
298303

299304
if (p0 > 0 && p0 + 1 < (int) get_size()) {
@@ -313,8 +318,6 @@ void llama_kv_cache_unified::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id
313318

314319
ggml_backend_tensor_copy(layer.k_stream[s0], layer.k_stream[s1]);
315320
ggml_backend_tensor_copy(layer.v_stream[s0], layer.v_stream[s1]);
316-
317-
// TODO: do we need synchronization here?
318321
}
319322

320323
v_cells[s1].reset();

0 commit comments

Comments
 (0)