ggml-org · ochafik · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1879,6 +1879,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             params.slot_prompt_similarity = std::stof(value);
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}));
+    add_opt(llama_arg(
+        {"--testing-sampler-delay-millis"}, "N",
+        format("for tests: delay in milliseconds to add to each sampling (default: %d)", params.testing_sampler_delay_millis),
+        [](gpt_params & params, int value) {
+            params.testing_sampler_delay_millis = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
     add_opt(llama_arg(
         {"--lora-init-without-apply"},
         format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"),

diff --git a/common/common.h b/common/common.h
@@ -299,6 +299,8 @@ struct gpt_params {
 
     float slot_prompt_similarity = 0.5f;
 
+    int testing_sampler_delay_millis = 0;
+
     // batched-bench params
     bool is_pp_shared = false;