Skip to content

Commit 91c188d

Browse files
authored
Only use FIM middle token if it exists (#7648)
* Only use FIM middle if it exists * Only use FIM middle if it exists
1 parent 84f6de1 commit 91c188d

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

examples/infill/infill.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,11 @@ int main(int argc, char ** argv) {
223223
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
224224
embd_inp = inp_pfx;
225225
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
226-
embd_inp.push_back(llama_token_middle(model));
226+
227+
const llama_token middle_token = llama_token_middle(model);
228+
if (middle_token >= 0) {
229+
embd_inp.push_back(middle_token);
230+
}
227231

228232
LOG("prefix: \"%s\"\n", log_tostr(params.input_prefix));
229233
LOG("suffix: \"%s\"\n", log_tostr(params.input_suffix));
@@ -528,7 +532,12 @@ int main(int argc, char ** argv) {
528532
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
529533
embd_inp = inp_pfx;
530534
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
531-
embd_inp.push_back(llama_token_middle(model));
535+
536+
const llama_token middle_token = llama_token_middle(model);
537+
if (middle_token >= 0) {
538+
embd_inp.push_back(middle_token);
539+
}
540+
532541
embd.clear();
533542
n_remain = params.n_predict;
534543
n_past = 0;

examples/server/server.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2038,7 +2038,12 @@ struct server_context {
20382038
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
20392039
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
20402040
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
2041-
prefix_tokens.push_back(llama_token_middle(model));
2041+
2042+
const llama_token middle_token = llama_token_middle(model);
2043+
if (middle_token >= 0) {
2044+
prefix_tokens.push_back(middle_token);
2045+
}
2046+
20422047
prompt_tokens = prefix_tokens;
20432048
} else {
20442049
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt

0 commit comments

Comments
 (0)