Skip to content

Commit 452207f

Browse files
committed
memory : avoid referring to KV in recurrent cache logs
1 parent 7f3955a commit 452207f

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

src/llama-memory-recurrent.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ llama_memory_recurrent::llama_memory_recurrent(
2525
uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) {
2626
const int32_t n_layer = hparams.n_layer;
2727

28-
LLAMA_LOG_INFO("%s: mem_size = %u, n_seq_max = %u, type_r = '%s', type_s = '%s', n_layer = %d\n",
29-
__func__, mem_size, n_seq_max, ggml_type_name(type_r), ggml_type_name(type_s), n_layer);
30-
3128
head = 0;
3229
size = mem_size;
3330
used = 0;
@@ -84,7 +81,7 @@ llama_memory_recurrent::llama_memory_recurrent(
8481

8582
ggml_context * ctx = ctx_for_buft(buft);
8683
if (!ctx) {
87-
throw std::runtime_error("failed to create ggml context for kv cache");
84+
throw std::runtime_error("failed to create ggml context for rs cache");
8885
}
8986

9087
ggml_tensor * r = ggml_new_tensor_1d(ctx, type_r, hparams.n_embd_r()*mem_size);
@@ -102,19 +99,19 @@ llama_memory_recurrent::llama_memory_recurrent(
10299

103100
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
104101
if (!buf) {
105-
throw std::runtime_error("failed to allocate buffer for kv cache");
102+
throw std::runtime_error("failed to allocate buffer for rs cache");
106103
}
107104
ggml_backend_buffer_clear(buf, 0);
108-
LLAMA_LOG_INFO("%s: %10s KV buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf)/1024.0/1024.0);
105+
LLAMA_LOG_INFO("%s: %10s RS buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf)/1024.0/1024.0);
109106
bufs.emplace_back(buf);
110107
}
111108

112109
{
113110
const size_t memory_size_r = size_r_bytes();
114111
const size_t memory_size_s = size_s_bytes();
115112

116-
LLAMA_LOG_INFO("%s: KV self size = %7.2f MiB, R (%s): %7.2f MiB, S (%s): %7.2f MiB\n", __func__,
117-
(float)(memory_size_r + memory_size_s) / (1024.0f * 1024.0f),
113+
LLAMA_LOG_INFO("%s: size = %7.2f MiB (%6u cells, %3d layers, %2u seqs), R (%s): %7.2f MiB, S (%s): %7.2f MiB\n", __func__,
114+
(float)(memory_size_r + memory_size_s) / (1024.0f * 1024.0f), mem_size, n_layer, n_seq_max,
118115
ggml_type_name(type_r), (float)memory_size_r / (1024.0f * 1024.0f),
119116
ggml_type_name(type_s), (float)memory_size_s / (1024.0f * 1024.0f));
120117
}

0 commit comments

Comments
 (0)