infill : assert prefix/suffix tokens + remove old space logic (#8351)

ggerganov · web-flow · commit 6f0dbf6ab087 · 2024-07-08T09:34:35.000+03:00
diff --git a/common/log.h b/common/log.h
@@ -630,7 +630,7 @@ inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
     buf << "[ ";
 
     bool first = true;
-    for (const auto &token : tokens)
+    for (const auto & token : tokens)
     {
         if (!first) {
             buf << ", ";
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
@@ -204,21 +204,17 @@ int main(int argc, char ** argv) {
     GGML_ASSERT(llama_add_eos_token(model) != 1);
     LOG("add_bos: %d\n", add_bos);
 
-    bool suff_rm_leading_spc = params.escape;
-    if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
-        params.input_suffix.erase(0, 1);
-        suff_rm_leading_spc = false;
-    }
     std::vector<llama_token> embd_inp;
     std::vector<llama_token> embd_end;
     std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
     std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
-    const int space_token = 29871;
-    if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
-        inp_sfx.erase(inp_sfx.begin());
-    }
+
+    GGML_ASSERT(llama_token_prefix(model) >= 0);
+    GGML_ASSERT(llama_token_suffix(model) >= 0);
+
     inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
     inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
+
     embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
     embd_end = params.spm_infill ? inp_pfx : inp_sfx;
     if (add_bos) {
@@ -516,19 +512,14 @@ int main(int argc, char ** argv) {
                     string_process_escapes(params.input_prefix);
                     string_process_escapes(params.input_suffix);
                 }
-                suff_rm_leading_spc = params.escape;
-                if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
-                    params.input_suffix.erase(0, 1);
-                    suff_rm_leading_spc = false;
-                }
+
                 // tokenize new prefix and suffix
                 std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
                 std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
-                if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
-                    inp_sfx.erase(inp_sfx.begin());
-                }
+
                 inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
                 inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
+
                 embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
                 embd_end = params.spm_infill ? inp_pfx : inp_sfx;
                 if (add_bos) {

Original file line number	Diff line number	Diff line change
`@@ -630,7 +630,7 @@ inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)`
`630`	`630`	`buf << "[ ";`
`631`	`631`
`632`	`632`	`bool first = true;`
`633`		`- for (const auto &token : tokens)`
	`633`	`+ for (const auto & token : tokens)`
`634`	`634`	`{`
`635`	`635`	`if (!first) {`
`636`	`636`	`buf << ", ";`