Skip to content

Commit 7ae2932

Browse files
authored
kv-cache : add LLAMA_KV_CACHE_DEBUG environment variable (#14121)
1 parent 1f7d50b commit 7ae2932

File tree

2 files changed

+41
-14
lines changed

2 files changed

+41
-14
lines changed

src/llama-kv-cache-unified.cpp

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(
127127
ggml_type_name(type_k), (float)memory_size_k / (1024.0f * 1024.0f),
128128
ggml_type_name(type_v), (float)memory_size_v / (1024.0f * 1024.0f));
129129
}
130+
131+
const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG");
132+
debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;
130133
}
131134

132135
void llama_kv_cache_unified::clear(bool data) {
@@ -517,36 +520,58 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
517520
return -1;
518521
}
519522

520-
//#define FIND_SLOT_DEBUG 1
521-
#if FIND_SLOT_DEBUG
522-
LLAMA_LOG_WARN("begin: n = %5d, used = %5d, head = %5d, n_swa = %5d\n", cells.used_max_p1(), cells.get_used(), head, n_swa);
523+
if (debug > 0) {
524+
LLAMA_LOG_CONT("\n");
525+
LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", __func__, cells.used_max_p1(), cells.get_used(), head, get_size(), n_swa);
523526

524-
// for debugging
525-
{
526-
std::string ss;
527-
if (n_swa > 0) {
527+
if ((debug == 2 && n_swa > 0) || debug > 2) {
528+
std::string ss;
528529
for (uint32_t i = 0; i < cells.size(); ++i) {
529530
if (cells.is_empty(i)) {
530531
ss += '.';
531532
} else {
532533
ss += std::to_string(cells.seq_get(i));
533534
}
534535
if (i%256 == 255) {
536+
ss += " *";
535537
ss += '\n';
536538
}
537539
}
540+
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
538541
}
539-
LLAMA_LOG_WARN("\n%s\n", ss.c_str());
540-
}
541542

542-
for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
543-
if (cells.seq_pos_min(s) < 0) {
544-
continue;
543+
if ((debug == 2 && n_swa > 0) || debug > 2) {
544+
std::string ss;
545+
for (uint32_t i = 0; i < cells.size(); ++i) {
546+
std::string cur;
547+
if (cells.is_empty(i)) {
548+
cur = '.';
549+
} else {
550+
cur = std::to_string(cells.pos_get(i));
551+
}
552+
const int n = cur.size();
553+
for (int j = 0; j < 5 - n; ++j) {
554+
cur += ' ';
555+
}
556+
ss += cur;
557+
if (i%256 == 255) {
558+
ss += " *";
559+
}
560+
if (i%64 == 63) {
561+
ss += '\n';
562+
}
563+
}
564+
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
545565
}
546566

547-
LLAMA_LOG_WARN("kv_cells: n_swa = %4d, min[%d] = %5d, max[%d] = %5d\n", n_swa, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
567+
for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
568+
if (cells.seq_pos_min(s) < 0) {
569+
continue;
570+
}
571+
572+
LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
573+
}
548574
}
549-
#endif
550575

551576
uint32_t n_tested = 0;
552577

src/llama-kv-cache-unified.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ class llama_kv_cache_unified : public llama_memory_i {
158158
// SWA
159159
const uint32_t n_swa = 0;
160160

161+
int debug = 0;
162+
161163
const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
162164

163165
std::vector<ggml_context_ptr> ctxs;

0 commit comments

Comments
 (0)