Skip to content

Commit aad7450

Browse files
committed
display smartcontext detailled infos during inference
Fix compile of the added smartcontext infos
1 parent b01e9ec commit aad7450

File tree

1 file changed

+23
-21
lines changed

1 file changed

+23
-21
lines changed

model_adapter.cpp

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include <chrono>
1616

17+
//static int debugmode;
18+
1719
static auto bench_timer = std::chrono::high_resolution_clock().now();
1820

1921
void timer_start()
@@ -427,10 +429,10 @@ void print_tok_vec(std::vector<float> &embd)
427429
const float SCTruncationRatio = 0.5; //ratio for how many tokens to fast forward
428430
const int SCTokThreshold = 32 + (nctx*0.05); //how many tokens of similarity triggers smartcontext
429431

430-
// printf("\nORIGINAL CTX:\n");
431-
// print_tok_vec(current_context_tokens);
432-
// printf("\nORIGINAL EMBD:\n");
433-
// print_tok_vec(embd_inp);
432+
//printf("\nORIGINAL CTX:\n");
433+
//print_tok_vec(current_context_tokens);
434+
//printf("\nORIGINAL EMBD:\n");
435+
//print_tok_vec(embd_inp);
434436

435437
//fast forward the past based on identical tokens, stop once a divergence is noted
436438
int embd_inp_len = embd_inp.size();
@@ -479,9 +481,9 @@ void print_tok_vec(std::vector<float> &embd)
479481
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_past);
480482
embd_inp_len = embd_inp.size();
481483

482-
// printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
483-
// embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5);
484-
// printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
484+
printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8);
485+
embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5;
486+
printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
485487
}
486488

487489
//smart context mode, detect if we have a shifted context at max length
@@ -490,14 +492,14 @@ void print_tok_vec(std::vector<float> &embd)
490492

491493
if (fastforwardok && useSmartContext && smartcontext.size() > 0 && embd_inp_len >= SCInpLenThreshold)
492494
{
493-
// printf("curfullcontext:\n");
494-
// print_tok_vec(current_context_tokens);
495+
//printf("curfullcontext:\n");
496+
//print_tok_vec(current_context_tokens);
495497

496498
//see if smartcontext is still usable
497-
// printf("smartctx:\n");
498-
// print_tok_vec(smartcontext);
499-
// printf("embinp:\n");
500-
// print_tok_vec(embd_inp);
499+
//printf("smartctx:\n");
500+
//if(debugmode==1) {print_tok_vec(smartcontext);}
501+
//printf("embinp:\n");
502+
//if(debugmode==1) {print_tok_vec(embd_inp);}
501503

502504
auto shared = LongestCommonSubseq(smartcontext, embd_inp);
503505
if (shared.size() > SCTokThreshold && ArrStartWith(smartcontext, shared)) //at least 32 tokens in common
@@ -508,8 +510,8 @@ void print_tok_vec(std::vector<float> &embd)
508510
auto trimmed = std::vector<int>(embd_inp.begin() + found, embd_inp.end());
509511
embd_inp = trimmed;
510512
embd_inp_len = embd_inp.size();
511-
// printf("trimmed:\n");
512-
// print_tok_vec(embd_inp,&vocab.id_to_token);
513+
//printf("trimmed:\n");
514+
//if(debugmode==1) {print_tok_vec(embd_inp);}
513515
printf("\n[Reusing Smart Context: %d allowance remaining]", found);
514516

515517
int old_n_past = n_past;
@@ -521,7 +523,7 @@ void print_tok_vec(std::vector<float> &embd)
521523

522524
for (int i = n_past; i < current_context_tokens.size(); ++i)
523525
{
524-
// printf("\n%s and %s\n",vocab.id_to_token[current_context_tokens[i]].c_str(), vocab.id_to_token[embd_inp[i-offset_fix]].c_str());
526+
printf("\n%s and %s\n",current_context_tokens[i], embd_inp[i-offset_fix]);
525527
if (current_context_tokens[i] == embd_inp[i-offset_fix])
526528
{
527529
n_past += 1;
@@ -539,8 +541,8 @@ void print_tok_vec(std::vector<float> &embd)
539541

540542
last_n_tokens.erase(last_n_tokens.begin(), last_n_tokens.begin() + (n_past-old_n_past));
541543
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + (n_past-old_n_past));
542-
// printf("np:%d newembinp: \n",n_past);
543-
// print_tok_vec(embd_inp);
544+
//printf("np:%d newembinp: \n",n_past);
545+
//if (debugmode==1) {print_tok_vec(embd_inp);}
544546
}else{
545547
smartcontext.clear();
546548
}
@@ -563,9 +565,9 @@ void print_tok_vec(std::vector<float> &embd)
563565
//determine longest common substring after removing start part
564566
int shiftamt = embd_inp.size() * SCTruncationRatio;
565567
smartcontext = std::vector<int>(embd_inp.begin() + shiftamt, embd_inp.end());
566-
printf("\n[New Smart Context Triggered! Buffered Token Allowance: %d]",shiftamt);
567-
// printf("smartctx:\n");
568-
// print_tok_vec(smartcontext,&vocab.id_to_token);
568+
printf("\n[New Smart Context Triggered! Buffered Token Allowance: %d]",shiftamt);
569+
//printf("smartctx:\n");
570+
//if(debugmode==1) {print_tok_vec(smartcontext);}
569571
embd_inp = smartcontext;
570572
//if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
571573
//when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS

0 commit comments

Comments
 (0)