display smartcontext detailled infos during inference

Nexesenex · Nexesenex · commit aad7450c419f · 2024-07-08T06:29:09.000+02:00
Fix compile of the added smartcontext infos
diff --git a/model_adapter.cpp b/model_adapter.cpp
@@ -14,6 +14,8 @@
 
 #include <chrono>
 
+//static int debugmode;
+
 static auto bench_timer = std::chrono::high_resolution_clock().now();
 
 void timer_start()
@@ -427,10 +429,10 @@ void print_tok_vec(std::vector<float> &embd)
      const float SCTruncationRatio = 0.5; //ratio for how many tokens to fast forward
      const int SCTokThreshold = 32 + (nctx*0.05); //how many tokens of similarity triggers smartcontext
 
-    // printf("\nORIGINAL CTX:\n");
-    // print_tok_vec(current_context_tokens);
-    // printf("\nORIGINAL EMBD:\n");
-    // print_tok_vec(embd_inp);
+    //printf("\nORIGINAL CTX:\n");
+    //print_tok_vec(current_context_tokens);
+    //printf("\nORIGINAL EMBD:\n");
+    //print_tok_vec(embd_inp);
 
     //fast forward the past based on identical tokens, stop once a divergence is noted
     int embd_inp_len = embd_inp.size();
@@ -479,9 +481,9 @@ void print_tok_vec(std::vector<float> &embd)
         embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_past);
         embd_inp_len = embd_inp.size();
 		
-        // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
-        // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5);
-        // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
+        printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8); 
+        embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5;
+        printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
     }
 
     //smart context mode, detect if we have a shifted context at max length
@@ -490,14 +492,14 @@ void print_tok_vec(std::vector<float> &embd)
 
     if (fastforwardok && useSmartContext && smartcontext.size() > 0 && embd_inp_len >= SCInpLenThreshold)
     {
-        // printf("curfullcontext:\n");
-        // print_tok_vec(current_context_tokens);
+        //printf("curfullcontext:\n");
+        //print_tok_vec(current_context_tokens);
 
         //see if smartcontext is still usable
-        // printf("smartctx:\n");
-        // print_tok_vec(smartcontext);
-        // printf("embinp:\n");
-        // print_tok_vec(embd_inp);
+        //printf("smartctx:\n");
+        //if(debugmode==1) {print_tok_vec(smartcontext);}
+        //printf("embinp:\n");
+        //if(debugmode==1) {print_tok_vec(embd_inp);}
 		
         auto shared = LongestCommonSubseq(smartcontext, embd_inp);
         if (shared.size() > SCTokThreshold && ArrStartWith(smartcontext, shared)) //at least 32 tokens in common
@@ -508,8 +510,8 @@ void print_tok_vec(std::vector<float> &embd)
                 auto trimmed = std::vector<int>(embd_inp.begin() + found, embd_inp.end());
                 embd_inp = trimmed;
                 embd_inp_len = embd_inp.size();
-                // printf("trimmed:\n");
-                // print_tok_vec(embd_inp,&vocab.id_to_token);
+                //printf("trimmed:\n");
+                //if(debugmode==1) {print_tok_vec(embd_inp);}
                 printf("\n[Reusing Smart Context: %d allowance remaining]", found);
 
                 int old_n_past = n_past;
@@ -521,7 +523,7 @@ void print_tok_vec(std::vector<float> &embd)
 
                 for (int i = n_past; i < current_context_tokens.size(); ++i)
                 {
-                    // printf("\n%s and %s\n",vocab.id_to_token[current_context_tokens[i]].c_str(), vocab.id_to_token[embd_inp[i-offset_fix]].c_str());
+                    printf("\n%s and %s\n",current_context_tokens[i], embd_inp[i-offset_fix]);
                     if (current_context_tokens[i] == embd_inp[i-offset_fix])
                     {
                         n_past += 1;
@@ -539,8 +541,8 @@ void print_tok_vec(std::vector<float> &embd)
 
                 last_n_tokens.erase(last_n_tokens.begin(), last_n_tokens.begin() + (n_past-old_n_past));
                 embd_inp.erase(embd_inp.begin(), embd_inp.begin() + (n_past-old_n_past));
-                // printf("np:%d newembinp: \n",n_past);
-                // print_tok_vec(embd_inp);
+                //printf("np:%d newembinp: \n",n_past);
+                //if (debugmode==1) {print_tok_vec(embd_inp);}
             }else{
                 smartcontext.clear();
             }
@@ -563,9 +565,9 @@ void print_tok_vec(std::vector<float> &embd)
         //determine longest common substring after removing start part
         int shiftamt = embd_inp.size() * SCTruncationRatio;
         smartcontext = std::vector<int>(embd_inp.begin() + shiftamt, embd_inp.end());
-         printf("\n[New Smart Context Triggered! Buffered Token Allowance: %d]",shiftamt);
-        // printf("smartctx:\n");
-        // print_tok_vec(smartcontext,&vocab.id_to_token);
+        printf("\n[New Smart Context Triggered! Buffered Token Allowance: %d]",shiftamt);
+        //printf("smartctx:\n");
+        //if(debugmode==1) {print_tok_vec(smartcontext);}
         embd_inp = smartcontext;
         //if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
         //when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS