Skip to content

Commit faf69d4

Browse files
authored
llama : sanitize invalid tokens (ggml-org#9357)
* common : do not add null tokens during warmup ggml-ci * llama : check that the input tokens are valid ggml-ci * tests : fix batch size of bert model ggml-ci
1 parent e536426 commit faf69d4

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

common/common.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2690,10 +2690,15 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
26902690
llama_token bos = llama_token_bos(model);
26912691
llama_token eos = llama_token_eos(model);
26922692
// some models (e.g. T5) don't have a BOS token
2693-
if (bos != -1) {
2693+
if (bos != LLAMA_TOKEN_NULL) {
26942694
tmp.push_back(bos);
26952695
}
2696-
tmp.push_back(eos);
2696+
if (eos != LLAMA_TOKEN_NULL) {
2697+
tmp.push_back(eos);
2698+
}
2699+
if (tmp.empty()) {
2700+
tmp.push_back(0);
2701+
}
26972702

26982703
if (llama_model_has_encoder(model)) {
26992704
llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size(), 0, 0));

examples/server/tests/features/embeddings.feature

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ Feature: llama.cpp server
99
And a model alias bert-bge-small
1010
And 42 as server seed
1111
And 2 slots
12-
And 1024 as batch size
13-
And 1024 as ubatch size
12+
# the bert-bge-small model has context size of 512
13+
# since the generated prompts are as big as the batch size, we need to set the batch size to 512
14+
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5/blob/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/config.json#L20
15+
And 512 as batch size
16+
And 512 as ubatch size
1417
And 2048 KV cache size
1518
And embeddings extraction
1619
Then the server is starting

src/llama.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16066,6 +16066,13 @@ static int llama_decode_internal(
1606616066
return -1;
1606716067
}
1606816068

16069+
for (uint32_t i = 0; i < n_tokens_all; ++i) {
16070+
if (batch_all.token[i] < 0) {
16071+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
16072+
return -1;
16073+
}
16074+
}
16075+
1606916076
const auto & model = lctx.model;
1607016077
const auto & hparams = model.hparams;
1607116078
const auto & cparams = lctx.cparams;
@@ -16358,6 +16365,13 @@ static int llama_encode_internal(
1635816365
return -1;
1635916366
}
1636016367

16368+
for (uint32_t i = 0; i < n_tokens; ++i) {
16369+
if (batch.token[i] < 0) {
16370+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
16371+
return -1;
16372+
}
16373+
}
16374+
1636116375
const auto & model = lctx.model;
1636216376
const auto & hparams = model.hparams;
1636316377
const auto & cparams = lctx.cparams;

0 commit comments

Comments
 (0)