prepare to reinstate smartcontext detailed infos during inference

Nexesenex · Nexesenex · commit b01e9ecc834c · 2024-07-08T06:29:09.000+02:00
to understand how it works by testing
diff --git a/model_adapter.cpp b/model_adapter.cpp
@@ -427,6 +427,10 @@ void print_tok_vec(std::vector<float> &embd)
      const float SCTruncationRatio = 0.5; //ratio for how many tokens to fast forward
      const int SCTokThreshold = 32 + (nctx*0.05); //how many tokens of similarity triggers smartcontext
 
+    // printf("\nORIGINAL CTX:\n");
+    // print_tok_vec(current_context_tokens);
+    // printf("\nORIGINAL EMBD:\n");
+    // print_tok_vec(embd_inp);
 
     //fast forward the past based on identical tokens, stop once a divergence is noted
     int embd_inp_len = embd_inp.size();
@@ -474,6 +478,10 @@ void print_tok_vec(std::vector<float> &embd)
         last_n_tokens.erase(last_n_tokens.begin(), last_n_tokens.begin() + n_past);
         embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_past);
         embd_inp_len = embd_inp.size();
+		
+        // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
+        // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5);
+        // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
     }
 
     //smart context mode, detect if we have a shifted context at max length
@@ -482,7 +490,15 @@ void print_tok_vec(std::vector<float> &embd)
 
     if (fastforwardok && useSmartContext && smartcontext.size() > 0 && embd_inp_len >= SCInpLenThreshold)
     {
+        // printf("curfullcontext:\n");
+        // print_tok_vec(current_context_tokens);
+
         //see if smartcontext is still usable
+        // printf("smartctx:\n");
+        // print_tok_vec(smartcontext);
+        // printf("embinp:\n");
+        // print_tok_vec(embd_inp);
+		
         auto shared = LongestCommonSubseq(smartcontext, embd_inp);
         if (shared.size() > SCTokThreshold && ArrStartWith(smartcontext, shared)) //at least 32 tokens in common
         {
@@ -492,6 +508,8 @@ void print_tok_vec(std::vector<float> &embd)
                 auto trimmed = std::vector<int>(embd_inp.begin() + found, embd_inp.end());
                 embd_inp = trimmed;
                 embd_inp_len = embd_inp.size();
+                // printf("trimmed:\n");
+                // print_tok_vec(embd_inp,&vocab.id_to_token);
                 printf("\n[Reusing Smart Context: %d allowance remaining]", found);
 
                 int old_n_past = n_past;
@@ -503,6 +521,7 @@ void print_tok_vec(std::vector<float> &embd)
 
                 for (int i = n_past; i < current_context_tokens.size(); ++i)
                 {
+                    // printf("\n%s and %s\n",vocab.id_to_token[current_context_tokens[i]].c_str(), vocab.id_to_token[embd_inp[i-offset_fix]].c_str());
                     if (current_context_tokens[i] == embd_inp[i-offset_fix])
                     {
                         n_past += 1;
@@ -520,7 +539,8 @@ void print_tok_vec(std::vector<float> &embd)
 
                 last_n_tokens.erase(last_n_tokens.begin(), last_n_tokens.begin() + (n_past-old_n_past));
                 embd_inp.erase(embd_inp.begin(), embd_inp.begin() + (n_past-old_n_past));
-
+                // printf("np:%d newembinp: \n",n_past);
+                // print_tok_vec(embd_inp);
             }else{
                 smartcontext.clear();
             }
@@ -544,7 +564,8 @@ void print_tok_vec(std::vector<float> &embd)
         int shiftamt = embd_inp.size() * SCTruncationRatio;
         smartcontext = std::vector<int>(embd_inp.begin() + shiftamt, embd_inp.end());
          printf("\n[New Smart Context Triggered! Buffered Token Allowance: %d]",shiftamt);
-
+        // printf("smartctx:\n");
+        // print_tok_vec(smartcontext,&vocab.id_to_token);
         embd_inp = smartcontext;
         //if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
         //when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS