Skip to content

Commit 1b8b211

Browse files
committed
mamba : in comments, properly refer to KV cells instead of slots
1 parent 5acf897 commit 1b8b211

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

llama.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,7 +1802,7 @@ struct llama_kv_cell {
18021802
struct llama_kv_cache {
18031803
bool has_shift = false;
18041804
bool do_defrag = false;
1805-
// with Mamba, a slot can hold the state for more than one past token
1805+
// with Mamba, a cell can hold the state for more than one past token
18061806
bool unlimited = false;
18071807

18081808
// Note: The value of head isn't only used to optimize searching
@@ -2066,7 +2066,7 @@ static bool llama_kv_cache_init(
20662066

20672067
cache.has_shift = false;
20682068

2069-
// for now, only Mamba can hold state for more than one past token per slot
2069+
// for now, only Mamba can hold state for more than one past token per cell
20702070
cache.unlimited = model.arch == LLM_ARCH_MAMBA;
20712071

20722072
cache.head = 0;
@@ -2325,7 +2325,7 @@ static void llama_kv_cache_seq_cp(
23252325
cache.cells[seq_id_dst].delta = seq_id_src;
23262326
// NOTE: a sequence can't have multiple sources, but can have multiple destinations.
23272327
// For compatibility with the other KV cache API functions,
2328-
// the seq_id(s) of a slot suggests an intent to "copy to" those id(s),
2328+
// the seq_id(s) of a cell suggests an intent to "copy to" those id(s),
23292329
// so that when a sequence is copied, it can initially be found from the source cell.
23302330
cache.cells[seq_id_src].seq_id.insert(seq_id_dst);
23312331
// prevent the destination from getting cleared
@@ -12481,10 +12481,10 @@ struct llama_context * llama_new_context_with_model(
1248112481
ggml_type type_k = params.type_k;
1248212482
ggml_type type_v = params.type_v;
1248312483

12484-
// Mamba only needs a constant number of KV cache slots per sequence
12484+
// Mamba only needs a constant number of KV cache cells per sequence
1248512485
if (model->arch == LLM_ARCH_MAMBA) {
12486-
// Mamba needs as many slots as there are distinct sequences processed at the same time
12487-
// The extra slot allows dedicating a sequence id to the system prompt
12486+
// Mamba needs as many KV cells as there are sequences kept at any time
12487+
// The extra cell allows dedicating a sequence id to the system prompt
1248812488
// TODO: find a better way to get the max number of parallel sequences
1248912489
kv_size = params.n_parallel + 1;
1249012490
// it's probably best to keep as much precision as possible for the states

0 commit comments

Comments
 (0)