@@ -127,6 +127,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(
127
127
ggml_type_name (type_k), (float )memory_size_k / (1024 .0f * 1024 .0f ),
128
128
ggml_type_name (type_v), (float )memory_size_v / (1024 .0f * 1024 .0f ));
129
129
}
130
+
131
+ const char * LLAMA_KV_CACHE_DEBUG = getenv (" LLAMA_KV_CACHE_DEBUG" );
132
+ debug = LLAMA_KV_CACHE_DEBUG ? atoi (LLAMA_KV_CACHE_DEBUG) : 0 ;
130
133
}
131
134
132
135
void llama_kv_cache_unified::clear (bool data) {
@@ -517,36 +520,58 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
517
520
return -1 ;
518
521
}
519
522
520
- // #define FIND_SLOT_DEBUG 1
521
- # if FIND_SLOT_DEBUG
522
- LLAMA_LOG_WARN ( " begin : n = %5d, used = %5d, head = %5d, n_swa = %5d\n " , cells.used_max_p1 (), cells.get_used (), head, n_swa);
523
+ if (debug > 0 ) {
524
+ LLAMA_LOG_CONT ( " \n " );
525
+ LLAMA_LOG_DEBUG ( " %s : n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n " , __func__, cells.used_max_p1 (), cells.get_used (), head, get_size () , n_swa);
523
526
524
- // for debugging
525
- {
526
- std::string ss;
527
- if (n_swa > 0 ) {
527
+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
528
+ std::string ss;
528
529
for (uint32_t i = 0 ; i < cells.size (); ++i) {
529
530
if (cells.is_empty (i)) {
530
531
ss += ' .' ;
531
532
} else {
532
533
ss += std::to_string (cells.seq_get (i));
533
534
}
534
535
if (i%256 == 255 ) {
536
+ ss += " *" ;
535
537
ss += ' \n ' ;
536
538
}
537
539
}
540
+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
538
541
}
539
- LLAMA_LOG_WARN (" \n %s\n " , ss.c_str ());
540
- }
541
542
542
- for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
543
- if (cells.seq_pos_min (s) < 0 ) {
544
- continue ;
543
+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
544
+ std::string ss;
545
+ for (uint32_t i = 0 ; i < cells.size (); ++i) {
546
+ std::string cur;
547
+ if (cells.is_empty (i)) {
548
+ cur = ' .' ;
549
+ } else {
550
+ cur = std::to_string (cells.pos_get (i));
551
+ }
552
+ const int n = cur.size ();
553
+ for (int j = 0 ; j < 5 - n; ++j) {
554
+ cur += ' ' ;
555
+ }
556
+ ss += cur;
557
+ if (i%256 == 255 ) {
558
+ ss += " *" ;
559
+ }
560
+ if (i%64 == 63 ) {
561
+ ss += ' \n ' ;
562
+ }
563
+ }
564
+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
545
565
}
546
566
547
- LLAMA_LOG_WARN (" kv_cells: n_swa = %4d, min[%d] = %5d, max[%d] = %5d\n " , n_swa, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
567
+ for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
568
+ if (cells.seq_pos_min (s) < 0 ) {
569
+ continue ;
570
+ }
571
+
572
+ LLAMA_LOG_DEBUG (" %s: min[%d] = %5d, max[%d] = %5d\n " , __func__, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
573
+ }
548
574
}
549
- #endif
550
575
551
576
uint32_t n_tested = 0 ;
552
577
0 commit comments