14
14
15
15
#include < chrono>
16
16
17
+ // static int debugmode;
18
+
17
19
static auto bench_timer = std::chrono::high_resolution_clock().now();
18
20
19
21
void timer_start ()
@@ -427,10 +429,10 @@ void print_tok_vec(std::vector<float> &embd)
427
429
const float SCTruncationRatio = 0.5 ; // ratio for how many tokens to fast forward
428
430
const int SCTokThreshold = 32 + (nctx*0.05 ); // how many tokens of similarity triggers smartcontext
429
431
430
- // printf("\nORIGINAL CTX:\n");
431
- // print_tok_vec(current_context_tokens);
432
- // printf("\nORIGINAL EMBD:\n");
433
- // print_tok_vec(embd_inp);
432
+ // printf("\nORIGINAL CTX:\n");
433
+ // print_tok_vec(current_context_tokens);
434
+ // printf("\nORIGINAL EMBD:\n");
435
+ // print_tok_vec(embd_inp);
434
436
435
437
// fast forward the past based on identical tokens, stop once a divergence is noted
436
438
int embd_inp_len = embd_inp.size ();
@@ -479,9 +481,9 @@ void print_tok_vec(std::vector<float> &embd)
479
481
embd_inp.erase (embd_inp.begin (), embd_inp.begin () + n_past);
480
482
embd_inp_len = embd_inp.size ();
481
483
482
- // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
483
- // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5) ;
484
- // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
484
+ printf (" \n conds: %d %d %d\n " ,current_context_tokens.size () >= nctx*0.8 );
485
+ embd_inp_len >= nctx*0.6 ,current_context_tokens.size () - n_past > nctx*0.5 ;
486
+ printf (" csiz:%d par:%d eilen:%d np:%d" ,current_context_tokens.size (), (int )(nctx*0.8 ),embd_inp_len,n_past);
485
487
}
486
488
487
489
// smart context mode, detect if we have a shifted context at max length
@@ -490,14 +492,14 @@ void print_tok_vec(std::vector<float> &embd)
490
492
491
493
if (fastforwardok && useSmartContext && smartcontext.size () > 0 && embd_inp_len >= SCInpLenThreshold)
492
494
{
493
- // printf("curfullcontext:\n");
494
- // print_tok_vec(current_context_tokens);
495
+ // printf("curfullcontext:\n");
496
+ // print_tok_vec(current_context_tokens);
495
497
496
498
// see if smartcontext is still usable
497
- // printf("smartctx:\n");
498
- // print_tok_vec(smartcontext);
499
- // printf("embinp:\n");
500
- // print_tok_vec(embd_inp);
499
+ // printf("smartctx:\n");
500
+ // if(debugmode==1) { print_tok_vec(smartcontext);}
501
+ // printf("embinp:\n");
502
+ // if(debugmode==1) { print_tok_vec(embd_inp);}
501
503
502
504
auto shared = LongestCommonSubseq (smartcontext, embd_inp);
503
505
if (shared.size () > SCTokThreshold && ArrStartWith (smartcontext, shared)) // at least 32 tokens in common
@@ -508,8 +510,8 @@ void print_tok_vec(std::vector<float> &embd)
508
510
auto trimmed = std::vector<int >(embd_inp.begin () + found, embd_inp.end ());
509
511
embd_inp = trimmed;
510
512
embd_inp_len = embd_inp.size ();
511
- // printf("trimmed:\n");
512
- // print_tok_vec(embd_inp,&vocab.id_to_token);
513
+ // printf("trimmed:\n");
514
+ // if(debugmode==1) { print_tok_vec(embd_inp);}
513
515
printf (" \n [Reusing Smart Context: %d allowance remaining]" , found);
514
516
515
517
int old_n_past = n_past;
@@ -521,7 +523,7 @@ void print_tok_vec(std::vector<float> &embd)
521
523
522
524
for (int i = n_past; i < current_context_tokens.size (); ++i)
523
525
{
524
- // printf("\n%s and %s\n",vocab.id_to_token[ current_context_tokens[i]].c_str(), vocab.id_to_token[ embd_inp[i-offset_fix]].c_str() );
526
+ printf (" \n %s and %s\n " ,current_context_tokens[i], embd_inp[i-offset_fix]);
525
527
if (current_context_tokens[i] == embd_inp[i-offset_fix])
526
528
{
527
529
n_past += 1 ;
@@ -539,8 +541,8 @@ void print_tok_vec(std::vector<float> &embd)
539
541
540
542
last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + (n_past-old_n_past));
541
543
embd_inp.erase (embd_inp.begin (), embd_inp.begin () + (n_past-old_n_past));
542
- // printf("np:%d newembinp: \n",n_past);
543
- // print_tok_vec(embd_inp);
544
+ // printf("np:%d newembinp: \n",n_past);
545
+ // if (debugmode==1) { print_tok_vec(embd_inp);}
544
546
}else {
545
547
smartcontext.clear ();
546
548
}
@@ -563,9 +565,9 @@ void print_tok_vec(std::vector<float> &embd)
563
565
// determine longest common substring after removing start part
564
566
int shiftamt = embd_inp.size () * SCTruncationRatio;
565
567
smartcontext = std::vector<int >(embd_inp.begin () + shiftamt, embd_inp.end ());
566
- printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
567
- // printf("smartctx:\n");
568
- // print_tok_vec(smartcontext,&vocab.id_to_token);
568
+ printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
569
+ // printf("smartctx:\n");
570
+ // if(debugmode==1) { print_tok_vec(smartcontext);}
569
571
embd_inp = smartcontext;
570
572
// if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
571
573
// when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS
0 commit comments