Skip to content

Commit 958660b

Browse files
committed
server: allow to specify tokens as strings in logit_bias
1 parent 3856668 commit 958660b

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ node index.js
168168

169169
`ignore_eos`: Ignore end of stream token and continue generating (default: false).
170170

171-
`logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced (default: []).
171+
`logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings, e.g. `[["Hello, World!",false]]` will ban all tokens that represent the string `Hello, World!`. (default: []).
172172

173173
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
174174

examples/server/server.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -828,18 +828,36 @@ struct llama_server_context
828828
const int n_vocab = llama_n_vocab(model);
829829
for (const auto &el : *logit_bias)
830830
{
831-
if (el.is_array() && el.size() == 2 && el[0].is_number_integer())
831+
if (el.is_array() && el.size() == 2)
832832
{
833-
llama_token tok = el[0].get<llama_token>();
834-
if (tok >= 0 && tok < n_vocab)
833+
float bias;
834+
if (el[1].is_number())
835835
{
836-
if (el[1].is_number())
836+
bias = el[1].get<float>();
837+
}
838+
else if (el[1].is_boolean() && !el[1].get<bool>())
839+
{
840+
bias = -INFINITY;
841+
}
842+
else
843+
{
844+
continue;
845+
}
846+
847+
if(el[0].is_number_integer())
848+
{
849+
llama_token tok = el[0].get<llama_token>();
850+
if (tok >= 0 && tok < n_vocab)
837851
{
838-
slot->sparams.logit_bias[tok] = el[1].get<float>();
852+
slot->sparams.logit_bias[tok] = bias;
839853
}
840-
else if (el[1].is_boolean() && !el[1].get<bool>())
854+
}
855+
else if (el[0].is_string())
856+
{
857+
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
858+
for(auto tok : toks)
841859
{
842-
slot->sparams.logit_bias[tok] = -INFINITY;
860+
slot->sparams.logit_bias[tok] = bias;
843861
}
844862
}
845863
}

0 commit comments

Comments
 (0)