Skip to content

Commit dc795af

Browse files
kylo5abyarthw
authored andcommitted
common : remove duplicate function llama_should_add_bos_token (ggml-org#8778)
1 parent bd679bf commit dc795af

File tree

14 files changed

+26
-40
lines changed

14 files changed

+26
-40
lines changed

common/common.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,12 +2702,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
27022702
return text;
27032703
}
27042704

2705-
bool llama_should_add_bos_token(const llama_model * model) {
2706-
const int add_bos = llama_add_bos_token(model);
2707-
2708-
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
2709-
}
2710-
27112705
//
27122706
// Chat template utils
27132707
//

common/common.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,6 @@ std::string llama_detokenize(
380380
const std::vector<llama_token> & tokens,
381381
bool special = true);
382382

383-
// Uses the value from the model metadata if possible, otherwise
384-
// defaults to true when model type is SPM, otherwise false.
385-
bool llama_should_add_bos_token(const llama_model * model);
386-
387383
//
388384
// Chat template utils
389385
//

examples/cvector-generator/cvector-generator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ struct tokenized_prompt {
271271
size_t max_seq_len;
272272

273273
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
274-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
274+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
275275
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
276276
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
277277
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
127127
}
128128

129129
static bool run(llama_context * ctx, const gpt_params & params) {
130-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
130+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
131131

132132
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
133133

examples/imatrix/imatrix.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,8 @@ static void process_logits(
433433
}
434434

435435
static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
436-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
437-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
436+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
437+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
438438
const int n_ctx = llama_n_ctx(ctx);
439439

440440
auto tim1 = std::chrono::high_resolution_clock::now();

examples/infill/infill.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
203203
LOG_TEE("\n");
204204
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
205205
}
206-
const bool add_bos = llama_should_add_bos_token(model);
207-
GGML_ASSERT(llama_add_eos_token(model) != 1);
206+
const bool add_bos = llama_add_bos_token(model);
207+
GGML_ASSERT(!llama_add_eos_token(model));
208208
LOG("add_bos: %d\n", add_bos);
209209

210210
std::vector<llama_token> embd_inp;

examples/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
267267
}
268268
}
269269

270-
const bool add_bos = llama_should_add_bos_token(model);
270+
const bool add_bos = llama_add_bos_token(model);
271271
if (!llama_model_has_encoder(model)) {
272-
GGML_ASSERT(llama_add_eos_token(model) != 1);
272+
GGML_ASSERT(!llama_add_eos_token(model));
273273
}
274274
LOG("add_bos: %d\n", add_bos);
275275

examples/perplexity/perplexity.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
340340
// Output: `perplexity: 13.5106 [114/114]`
341341
// BOS tokens will be added for each chunk before eval
342342

343-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
344-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
343+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
344+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
345345

346346
fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
347347

@@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
480480
// Output: `perplexity: 13.5106 [114/114]`
481481
// BOS tokens will be added for each chunk before eval
482482

483-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
484-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
483+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
484+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
485485

486486
std::ofstream logits_stream;
487487
if (!params.logits_file.empty()) {
@@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
17331733
const int n_batch = params.n_batch;
17341734
const int num_batches = (n_ctx + n_batch - 1)/n_batch;
17351735
const int nv = 2*((n_vocab + 1)/2) + 4;
1736-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
1737-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
1736+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
1737+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
17381738

17391739
std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
17401740
std::vector<float> kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);

examples/server/server.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -693,9 +693,8 @@ struct server_context {
693693

694694
n_ctx = llama_n_ctx(ctx);
695695

696-
add_bos_token = llama_should_add_bos_token(model);
697-
has_eos_token = llama_add_eos_token(model) != 1;
698-
696+
add_bos_token = llama_add_bos_token(model);
697+
has_eos_token = !llama_add_eos_token(model);
699698
return true;
700699
}
701700

@@ -2038,7 +2037,7 @@ struct server_context {
20382037
slot.t_start_generation = 0;
20392038

20402039
if (slot.infill) {
2041-
const bool add_bos = llama_should_add_bos_token(model);
2040+
const bool add_bos = llama_add_bos_token(model);
20422041
bool suff_rm_leading_spc = true;
20432042
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
20442043
params.input_suffix.erase(0, 1);

examples/tokenize/tokenize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
362362
prompt = stdin_buffer.str();
363363
}
364364

365-
const bool model_wants_add_bos = llama_should_add_bos_token(model);
365+
const bool model_wants_add_bos = llama_add_bos_token(model);
366366
const bool add_bos = model_wants_add_bos && !no_bos;
367367
const bool parse_special = !no_parse_special;
368368

0 commit comments

Comments
 (0)