Skip to content

Commit 79a8176

Browse files
committed
server : add "tokens" output
ggml-ci
1 parent 08ea539 commit 79a8176

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

examples/server/server.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,10 @@ struct completion_token_output {
468468

469469
struct server_task_result_cmpl_final : server_task_result {
470470
int index = 0;
471-
std::string content;
471+
472+
std::string content;
473+
llama_tokens tokens;
474+
472475
bool stream;
473476
result_timings timings;
474477
std::string prompt;
@@ -510,6 +513,7 @@ struct server_task_result_cmpl_final : server_task_result {
510513
json res = json {
511514
{"index", index},
512515
{"content", stream ? "" : content}, // in stream mode, content is already in last partial chunk
516+
{"tokens", stream ? llama_tokens {} : tokens},
513517
{"id_slot", id_slot},
514518
{"stop", true},
515519
{"model", oaicompat_model},
@@ -541,7 +545,8 @@ struct server_task_result_cmpl_final : server_task_result {
541545
{"index", 0},
542546
{"message", json{
543547
{"content", content},
544-
{"role", "assistant"}
548+
{"tokens", tokens},
549+
{"role", "assistant"}
545550
}
546551
}}});
547552

@@ -605,7 +610,9 @@ struct server_task_result_cmpl_final : server_task_result {
605610

606611
struct server_task_result_cmpl_partial : server_task_result {
607612
int index = 0;
608-
std::string content;
613+
614+
std::string content;
615+
llama_tokens tokens;
609616

610617
int32_t n_decoded;
611618
int32_t n_prompt_tokens;
@@ -637,6 +644,7 @@ struct server_task_result_cmpl_partial : server_task_result {
637644
json res = json {
638645
{"index", index},
639646
{"content", content},
647+
{"tokens", tokens},
640648
{"stop", false},
641649
{"id_slot", id_slot},
642650
{"tokens_predicted", n_decoded},
@@ -679,7 +687,8 @@ struct server_task_result_cmpl_partial : server_task_result {
679687
{"choices", json::array({json{{"finish_reason", nullptr},
680688
{"index", 0},
681689
{"delta", json{
682-
{"content", content}}}
690+
{"content", content},
691+
{"tokens", tokens}}}
683692
}})},
684693
{"created", t},
685694
{"id", oaicompat_cmpl_id},
@@ -695,6 +704,7 @@ struct server_task_result_cmpl_partial : server_task_result {
695704
{"delta",
696705
json{
697706
{"content", content},
707+
{"tokens", tokens}
698708
}},
699709
}});
700710
}
@@ -949,8 +959,11 @@ struct server_slot {
949959

950960
size_t last_nl_pos = 0;
951961

952-
std::string generated_text;
962+
std::string generated_text;
963+
llama_tokens generated_tokens;
964+
953965
llama_tokens cache_tokens;
966+
954967
std::vector<completion_token_output> generated_token_probs;
955968

956969
bool has_next_token = true;
@@ -985,6 +998,7 @@ struct server_slot {
985998
n_prompt_tokens = 0;
986999
last_nl_pos = 0;
9871000
generated_text = "";
1001+
generated_tokens = {};
9881002
has_new_line = false;
9891003
truncated = false;
9901004
stop = STOP_TYPE_NONE;
@@ -1736,6 +1750,7 @@ struct server_context {
17361750

17371751
// search stop word and delete it
17381752
slot.generated_text += token_str;
1753+
slot.generated_tokens.push_back(result.tok);
17391754
slot.has_next_token = true;
17401755

17411756
// check if there is incomplete UTF-8 character at the end
@@ -1912,6 +1927,7 @@ struct server_context {
19121927
res->id = slot.id_task;
19131928
res->index = slot.index;
19141929
res->content = tkn.text_to_send;
1930+
res->tokens = { tkn.tok };
19151931

19161932
res->n_decoded = slot.n_decoded;
19171933
res->n_prompt_tokens = slot.n_prompt_tokens;
@@ -1952,6 +1968,7 @@ struct server_context {
19521968

19531969
res->index = slot.index;
19541970
res->content = slot.generated_text;
1971+
res->tokens = slot.generated_tokens;
19551972
res->timings = slot.get_timings();
19561973
res->prompt = common_detokenize(ctx, slot.prompt_tokens, true);
19571974

0 commit comments

Comments
 (0)