@@ -427,6 +427,10 @@ void print_tok_vec(std::vector<float> &embd)
427
427
const float SCTruncationRatio = 0.5 ; // ratio for how many tokens to fast forward
428
428
const int SCTokThreshold = 32 + (nctx*0.05 ); // how many tokens of similarity triggers smartcontext
429
429
430
+ // printf("\nORIGINAL CTX:\n");
431
+ // print_tok_vec(current_context_tokens);
432
+ // printf("\nORIGINAL EMBD:\n");
433
+ // print_tok_vec(embd_inp);
430
434
431
435
// fast forward the past based on identical tokens, stop once a divergence is noted
432
436
int embd_inp_len = embd_inp.size ();
@@ -474,6 +478,10 @@ void print_tok_vec(std::vector<float> &embd)
474
478
last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + n_past);
475
479
embd_inp.erase (embd_inp.begin (), embd_inp.begin () + n_past);
476
480
embd_inp_len = embd_inp.size ();
481
+
482
+ // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
483
+ // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5);
484
+ // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
477
485
}
478
486
479
487
// smart context mode, detect if we have a shifted context at max length
@@ -482,7 +490,15 @@ void print_tok_vec(std::vector<float> &embd)
482
490
483
491
if (fastforwardok && useSmartContext && smartcontext.size () > 0 && embd_inp_len >= SCInpLenThreshold)
484
492
{
493
+ // printf("curfullcontext:\n");
494
+ // print_tok_vec(current_context_tokens);
495
+
485
496
// see if smartcontext is still usable
497
+ // printf("smartctx:\n");
498
+ // print_tok_vec(smartcontext);
499
+ // printf("embinp:\n");
500
+ // print_tok_vec(embd_inp);
501
+
486
502
auto shared = LongestCommonSubseq (smartcontext, embd_inp);
487
503
if (shared.size () > SCTokThreshold && ArrStartWith (smartcontext, shared)) // at least 32 tokens in common
488
504
{
@@ -492,6 +508,8 @@ void print_tok_vec(std::vector<float> &embd)
492
508
auto trimmed = std::vector<int >(embd_inp.begin () + found, embd_inp.end ());
493
509
embd_inp = trimmed;
494
510
embd_inp_len = embd_inp.size ();
511
+ // printf("trimmed:\n");
512
+ // print_tok_vec(embd_inp,&vocab.id_to_token);
495
513
printf (" \n [Reusing Smart Context: %d allowance remaining]" , found);
496
514
497
515
int old_n_past = n_past;
@@ -503,6 +521,7 @@ void print_tok_vec(std::vector<float> &embd)
503
521
504
522
for (int i = n_past; i < current_context_tokens.size (); ++i)
505
523
{
524
+ // printf("\n%s and %s\n",vocab.id_to_token[current_context_tokens[i]].c_str(), vocab.id_to_token[embd_inp[i-offset_fix]].c_str());
506
525
if (current_context_tokens[i] == embd_inp[i-offset_fix])
507
526
{
508
527
n_past += 1 ;
@@ -520,7 +539,8 @@ void print_tok_vec(std::vector<float> &embd)
520
539
521
540
last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + (n_past-old_n_past));
522
541
embd_inp.erase (embd_inp.begin (), embd_inp.begin () + (n_past-old_n_past));
523
-
542
+ // printf("np:%d newembinp: \n",n_past);
543
+ // print_tok_vec(embd_inp);
524
544
}else {
525
545
smartcontext.clear ();
526
546
}
@@ -544,7 +564,8 @@ void print_tok_vec(std::vector<float> &embd)
544
564
int shiftamt = embd_inp.size () * SCTruncationRatio;
545
565
smartcontext = std::vector<int >(embd_inp.begin () + shiftamt, embd_inp.end ());
546
566
printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
547
-
567
+ // printf("smartctx:\n");
568
+ // print_tok_vec(smartcontext,&vocab.id_to_token);
548
569
embd_inp = smartcontext;
549
570
// if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
550
571
// when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS
0 commit comments