@@ -341,7 +341,7 @@ struct server_slot {
341
341
};
342
342
343
343
struct server_metrics {
344
- const int64_t t_start = ggml_time_us() ;
344
+ int64_t t_start = 0 ;
345
345
346
346
uint64_t n_prompt_tokens_processed_total = 0 ;
347
347
uint64_t t_prompt_processing_total = 0 ;
@@ -354,14 +354,18 @@ struct server_metrics {
354
354
uint64_t n_tokens_predicted = 0 ;
355
355
uint64_t t_tokens_generation = 0 ;
356
356
357
- void on_prompt_eval (const server_slot &slot) {
357
+ void init () {
358
+ t_start = ggml_time_us ();
359
+ }
360
+
361
+ void on_prompt_eval (const server_slot & slot) {
358
362
n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed ;
359
363
n_prompt_tokens_processed += slot.n_prompt_tokens_processed ;
360
364
t_prompt_processing += slot.t_prompt_processing ;
361
365
t_prompt_processing_total += slot.t_prompt_processing ;
362
366
}
363
367
364
- void on_prediction (const server_slot &slot) {
368
+ void on_prediction (const server_slot & slot) {
365
369
n_tokens_predicted_total += slot.n_decoded ;
366
370
n_tokens_predicted += slot.n_decoded ;
367
371
t_tokens_generation += slot.t_token_generation ;
@@ -690,10 +694,11 @@ struct server_context {
690
694
return res > 0 ;
691
695
}
692
696
693
- void initialize () {
697
+ void init () {
694
698
const int32_t n_ctx_slot = n_ctx / params.n_parallel ;
695
699
696
700
LOG_INFO (" initializing slots" , {{" n_slots" , params.n_parallel }});
701
+
697
702
for (int i = 0 ; i < params.n_parallel ; i++) {
698
703
server_slot slot;
699
704
@@ -735,6 +740,8 @@ struct server_context {
735
740
default_generation_settings_for_props[" seed" ] = -1 ;
736
741
737
742
batch = llama_batch_init (n_ctx, 0 , params.n_parallel );
743
+
744
+ metrics.init ();
738
745
}
739
746
740
747
std::vector<llama_token> tokenize (const json & json_prompt, bool add_bos) const {
@@ -2783,7 +2790,7 @@ int main(int argc, char ** argv) {
2783
2790
state.store (SERVER_STATE_ERROR);
2784
2791
return 1 ;
2785
2792
} else {
2786
- ctx_server.initialize ();
2793
+ ctx_server.init ();
2787
2794
state.store (SERVER_STATE_READY);
2788
2795
}
2789
2796
0 commit comments