Skip to content

Commit 8db910e

Browse files
committed
fix: Fix resize vs reserve and skip null tensors in size computation
https://github.com/ggml-org/llama.cpp/pull/13979/files#r2149469788 Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> Co-Authored-By: @younesbelkada
1 parent bdfdbbf commit 8db910e

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/llama-kv-cache-recurrent.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
6060
return it->second;
6161
};
6262

63-
k_l.reserve(n_layer);
64-
v_l.reserve(n_layer);
63+
k_l.resize(n_layer);
64+
v_l.resize(n_layer);
6565

6666
for (int i = 0; i < n_layer; i++) {
6767
if (filter && !filter(i)) {
@@ -647,7 +647,9 @@ size_t llama_kv_cache_recurrent::size_k_bytes() const {
647647
size_t size_k_bytes = 0;
648648

649649
for (const auto & k : k_l) {
650-
size_k_bytes += ggml_nbytes(k);
650+
if (k != nullptr) {
651+
size_k_bytes += ggml_nbytes(k);
652+
}
651653
}
652654

653655
return size_k_bytes;
@@ -657,7 +659,9 @@ size_t llama_kv_cache_recurrent::size_v_bytes() const {
657659
size_t size_v_bytes = 0;
658660

659661
for (const auto & v : v_l) {
660-
size_v_bytes += ggml_nbytes(v);
662+
if (v != nullptr) {
663+
size_v_bytes += ggml_nbytes(v);
664+
}
661665
}
662666

663667
return size_v_bytes;

0 commit comments

Comments
 (0)