Skip to content

Commit 5350fb5

Browse files
ggerganovjordankanter
authored andcommitted
server : fix metrics init (ggml-org#5964)
1 parent 8201ef3 commit 5350fb5

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

examples/server/server.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ struct server_slot {
341341
};
342342

343343
struct server_metrics {
344-
const int64_t t_start = ggml_time_us();
344+
int64_t t_start = 0;
345345

346346
uint64_t n_prompt_tokens_processed_total = 0;
347347
uint64_t t_prompt_processing_total = 0;
@@ -354,14 +354,18 @@ struct server_metrics {
354354
uint64_t n_tokens_predicted = 0;
355355
uint64_t t_tokens_generation = 0;
356356

357-
void on_prompt_eval(const server_slot &slot) {
357+
void init() {
358+
t_start = ggml_time_us();
359+
}
360+
361+
void on_prompt_eval(const server_slot & slot) {
358362
n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed;
359363
n_prompt_tokens_processed += slot.n_prompt_tokens_processed;
360364
t_prompt_processing += slot.t_prompt_processing;
361365
t_prompt_processing_total += slot.t_prompt_processing;
362366
}
363367

364-
void on_prediction(const server_slot &slot) {
368+
void on_prediction(const server_slot & slot) {
365369
n_tokens_predicted_total += slot.n_decoded;
366370
n_tokens_predicted += slot.n_decoded;
367371
t_tokens_generation += slot.t_token_generation;
@@ -690,10 +694,11 @@ struct server_context {
690694
return res > 0;
691695
}
692696

693-
void initialize() {
697+
void init() {
694698
const int32_t n_ctx_slot = n_ctx / params.n_parallel;
695699

696700
LOG_INFO("initializing slots", {{"n_slots", params.n_parallel}});
701+
697702
for (int i = 0; i < params.n_parallel; i++) {
698703
server_slot slot;
699704

@@ -735,6 +740,8 @@ struct server_context {
735740
default_generation_settings_for_props["seed"] = -1;
736741

737742
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
743+
744+
metrics.init();
738745
}
739746

740747
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const {
@@ -2783,7 +2790,7 @@ int main(int argc, char ** argv) {
27832790
state.store(SERVER_STATE_ERROR);
27842791
return 1;
27852792
} else {
2786-
ctx_server.initialize();
2793+
ctx_server.init();
27872794
state.store(SERVER_STATE_READY);
27882795
}
27892796

0 commit comments

Comments
 (0)