@@ -25,9 +25,6 @@ llama_memory_recurrent::llama_memory_recurrent(
25
25
uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) {
26
26
const int32_t n_layer = hparams.n_layer ;
27
27
28
- LLAMA_LOG_INFO (" %s: mem_size = %u, n_seq_max = %u, type_r = '%s', type_s = '%s', n_layer = %d\n " ,
29
- __func__, mem_size, n_seq_max, ggml_type_name (type_r), ggml_type_name (type_s), n_layer);
30
-
31
28
head = 0 ;
32
29
size = mem_size;
33
30
used = 0 ;
@@ -84,7 +81,7 @@ llama_memory_recurrent::llama_memory_recurrent(
84
81
85
82
ggml_context * ctx = ctx_for_buft (buft);
86
83
if (!ctx) {
87
- throw std::runtime_error (" failed to create ggml context for kv cache" );
84
+ throw std::runtime_error (" failed to create ggml context for rs cache" );
88
85
}
89
86
90
87
ggml_tensor * r = ggml_new_tensor_1d (ctx, type_r, hparams.n_embd_r ()*mem_size);
@@ -102,19 +99,19 @@ llama_memory_recurrent::llama_memory_recurrent(
102
99
103
100
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft (ctx, buft);
104
101
if (!buf) {
105
- throw std::runtime_error (" failed to allocate buffer for kv cache" );
102
+ throw std::runtime_error (" failed to allocate buffer for rs cache" );
106
103
}
107
104
ggml_backend_buffer_clear (buf, 0 );
108
- LLAMA_LOG_INFO (" %s: %10s KV buffer size = %8.2f MiB\n " , __func__, ggml_backend_buffer_name (buf), ggml_backend_buffer_get_size (buf)/1024.0 /1024.0 );
105
+ LLAMA_LOG_INFO (" %s: %10s RS buffer size = %8.2f MiB\n " , __func__, ggml_backend_buffer_name (buf), ggml_backend_buffer_get_size (buf)/1024.0 /1024.0 );
109
106
bufs.emplace_back (buf);
110
107
}
111
108
112
109
{
113
110
const size_t memory_size_r = size_r_bytes ();
114
111
const size_t memory_size_s = size_s_bytes ();
115
112
116
- LLAMA_LOG_INFO (" %s: KV self size = %7.2f MiB, R (%s): %7.2f MiB, S (%s): %7.2f MiB\n " , __func__,
117
- (float )(memory_size_r + memory_size_s) / (1024 .0f * 1024 .0f ),
113
+ LLAMA_LOG_INFO (" %s: size = %7.2f MiB (%6u cells, %3d layers, %2u seqs) , R (%s): %7.2f MiB, S (%s): %7.2f MiB\n " , __func__,
114
+ (float )(memory_size_r + memory_size_s) / (1024 .0f * 1024 .0f ), mem_size, n_layer, n_seq_max,
118
115
ggml_type_name (type_r), (float )memory_size_r / (1024 .0f * 1024 .0f ),
119
116
ggml_type_name (type_s), (float )memory_size_s / (1024 .0f * 1024 .0f ));
120
117
}
0 commit comments