@@ -69,8 +69,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
69
69
continue ;
70
70
}
71
71
72
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s ();
73
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s ();
72
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s (i );
73
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s (i );
74
74
75
75
const char * dev_name = " CPU" ;
76
76
@@ -756,7 +756,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
756
756
// Iterate and write all the keys first, each row is a cell
757
757
// Get whole range at a time
758
758
for (uint32_t il = 0 ; il < n_layer; ++il) {
759
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
759
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
760
760
761
761
// Write key type
762
762
const int32_t k_type_i = (int32_t )k_l[il]->type ;
@@ -776,7 +776,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
776
776
777
777
if (!v_trans) {
778
778
for (uint32_t il = 0 ; il < n_layer; ++il) {
779
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
779
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
780
780
781
781
// Write value type
782
782
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -797,7 +797,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
797
797
// When v is transposed, we also need the element size and get the element ranges from each row
798
798
const uint32_t kv_size = size;
799
799
for (uint32_t il = 0 ; il < n_layer; ++il) {
800
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
800
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
801
801
802
802
// Write value type
803
803
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -944,7 +944,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
944
944
945
945
// For each layer, read the keys for each cell, one row is one cell, read as one contiguous block
946
946
for (uint32_t il = 0 ; il < n_layer; ++il) {
947
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
947
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
948
948
949
949
// Read type of key
950
950
int32_t k_type_i_ref;
@@ -972,7 +972,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
972
972
973
973
if (!v_trans) {
974
974
for (uint32_t il = 0 ; il < n_layer; ++il) {
975
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
975
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
976
976
977
977
// Read type of value
978
978
int32_t v_type_i_ref;
@@ -1000,7 +1000,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
1000
1000
} else {
1001
1001
// For each layer, read the values for each cell (transposed)
1002
1002
for (uint32_t il = 0 ; il < n_layer; ++il) {
1003
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1003
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1004
1004
1005
1005
// Read type of value
1006
1006
int32_t v_type_i_ref;
0 commit comments