From d9157cdf345bc0b878d20416044508f000a6d61b Mon Sep 17 00:00:00 2001 From: ManniX-ITA <20623405+mann1x@users.noreply.github.com> Date: Wed, 17 Apr 2024 18:55:09 +0200 Subject: [PATCH 1/5] Update server.cpp example with correct startup sequence The HTTP listener start and the health API endpoint are moved before the model loading starts, hence the server can correctly report is loading the model --- examples/server/server.cpp | 202 +++++++++++++++++++------------------ 1 file changed, 103 insertions(+), 99 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 634e653ada284..6dcdc7c8902f7 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3009,6 +3009,108 @@ int main(int argc, char ** argv) { log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; } + const auto handle_health = [&](const httplib::Request & req, httplib::Response & res) { + server_state current_state = state.load(); + switch (current_state) { + case SERVER_STATE_READY: + { + // request slots data using task queue + server_task task; + task.id = ctx_server.queue_tasks.get_new_id(); + task.type = SERVER_TASK_TYPE_METRICS; + task.id_target = -1; + + ctx_server.queue_results.add_waiting_task_id(task.id); + ctx_server.queue_tasks.post(task); + + // get the result + server_task_result result = ctx_server.queue_results.recv(task.id); + ctx_server.queue_results.remove_waiting_task_id(task.id); + + const int n_idle_slots = result.data["idle"]; + const int n_processing_slots = result.data["processing"]; + + json health = { + {"status", "ok"}, + {"slots_idle", n_idle_slots}, + {"slots_processing", n_processing_slots} + }; + + res.status = 200; // HTTP OK + if (sparams.slots_endpoint && req.has_param("include_slots")) { + health["slots"] = result.data["slots"]; + } + + if (n_idle_slots == 0) { + health["status"] = "no slot available"; + if (req.has_param("fail_on_no_slot")) { + res.status = 503; // HTTP Service Unavailable + } + } + + res.set_content(health.dump(), "application/json"); + break; + } + case SERVER_STATE_LOADING_MODEL: + { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + } break; + case SERVER_STATE_ERROR: + { + res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER)); + } break; + } + }; + + // register Health API routes + svr->Get ("/health", handle_health); + + auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { + return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { + res.set_content(reinterpret_cast(content), len, mime_type); + return false; + }; + }; + + // + // Router + // + + // register static assets routes + if (!sparams.public_path.empty()) { + // Set the base directory for serving static files + svr->set_base_dir(sparams.public_path); + } + + // using embedded static files + svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); + svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); + svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); + svr->Get("/json-schema-to-grammar.mjs", handle_static_file( + json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8")); + + // + // Start the server + // + if (sparams.n_threads_http < 1) { + // +2 threads for monitoring endpoints + sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); + } + log_data["n_threads_http"] = std::to_string(sparams.n_threads_http); + svr->new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; + + LOG_INFO("HTTP server listening", log_data); + + // run the HTTP server in a thread - see comment below + std::thread t([&]() { + if (!svr->listen_after_bind()) { + state.store(SERVER_STATE_ERROR); + return 1; + } + + return 0; + }); + // load the model if (!ctx_server.load_model(params)) { state.store(SERVER_STATE_ERROR); @@ -3110,59 +3212,6 @@ int main(int argc, char ** argv) { // Route handlers (or controllers) // - const auto handle_health = [&](const httplib::Request & req, httplib::Response & res) { - server_state current_state = state.load(); - switch (current_state) { - case SERVER_STATE_READY: - { - // request slots data using task queue - server_task task; - task.id = ctx_server.queue_tasks.get_new_id(); - task.type = SERVER_TASK_TYPE_METRICS; - task.id_target = -1; - - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task); - - // get the result - server_task_result result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); - - const int n_idle_slots = result.data["idle"]; - const int n_processing_slots = result.data["processing"]; - - json health = { - {"status", "ok"}, - {"slots_idle", n_idle_slots}, - {"slots_processing", n_processing_slots} - }; - - res.status = 200; // HTTP OK - if (sparams.slots_endpoint && req.has_param("include_slots")) { - health["slots"] = result.data["slots"]; - } - - if (n_idle_slots == 0) { - health["status"] = "no slot available"; - if (req.has_param("fail_on_no_slot")) { - res.status = 503; // HTTP Service Unavailable - } - } - - res.set_content(health.dump(), "application/json"); - break; - } - case SERVER_STATE_LOADING_MODEL: - { - res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); - } break; - case SERVER_STATE_ERROR: - { - res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER)); - } break; - } - }; - const auto handle_slots = [&](const httplib::Request &, httplib::Response & res) { if (!sparams.slots_endpoint) { res_error(res, format_error_response("This server does not support slots endpoint.", ERROR_TYPE_NOT_SUPPORTED)); @@ -3715,32 +3764,8 @@ int main(int argc, char ** argv) { return res.set_content(root.dump(), "application/json; charset=utf-8"); }; - auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { - return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { - res.set_content(reinterpret_cast(content), len, mime_type); - return false; - }; - }; - - // - // Router - // - - // register static assets routes - if (!sparams.public_path.empty()) { - // Set the base directory for serving static files - svr->set_base_dir(sparams.public_path); - } - - // using embedded static files - svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); - svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); - svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); - svr->Get("/json-schema-to-grammar.mjs", handle_static_file( - json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8")); // register API routes - svr->Get ("/health", handle_health); svr->Get ("/slots", handle_slots); svr->Get ("/metrics", handle_metrics); svr->Get ("/props", handle_props); @@ -3756,33 +3781,12 @@ int main(int argc, char ** argv) { svr->Post("/v1/embeddings", handle_embeddings); svr->Post("/tokenize", handle_tokenize); svr->Post("/detokenize", handle_detokenize); + if (!sparams.slot_save_path.empty()) { // only enable slot endpoints if slot_save_path is set svr->Post("/slots/:id_slot", handle_slots_action); } - // - // Start the server - // - if (sparams.n_threads_http < 1) { - // +2 threads for monitoring endpoints - sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); - } - log_data["n_threads_http"] = std::to_string(sparams.n_threads_http); - svr->new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; - - LOG_INFO("HTTP server listening", log_data); - - // run the HTTP server in a thread - see comment below - std::thread t([&]() { - if (!svr->listen_after_bind()) { - state.store(SERVER_STATE_ERROR); - return 1; - } - - return 0; - }); - ctx_server.queue_tasks.on_new_task(std::bind( &server_context::process_single_task, &ctx_server, std::placeholders::_1)); ctx_server.queue_tasks.on_finish_multitask(std::bind( From 52a4d5974739160c2bdb5e2cd0893a15cb19f3b4 Mon Sep 17 00:00:00 2001 From: ManniX-ITA <20623405+mann1x@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:14:29 +0200 Subject: [PATCH 2/5] Moved endpoints registration before listener and fixes - Moved endpoints registration before HTTP listener starts - Endpoints are returning the correct error when the model is loading or failed to load - Server is exiting if failed to bind the port --- examples/server/server.cpp | 271 +++++++++++++++++++++++-------------- 1 file changed, 166 insertions(+), 105 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6dcdc7c8902f7..cc455d8ce9150 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2282,6 +2282,17 @@ struct server_context { {"size", llama_model_size (model)}, }; } + + json empty_model_meta() const { + return json { + {"vocab_type", llama_vocab_type (0)}, + {"n_vocab", llama_n_vocab (0)}, + {"n_ctx_train", llama_n_ctx_train (0)}, + {"n_embd", llama_n_embd (0)}, + {"n_params", llama_model_n_params(0)}, + {"size", llama_model_size (0)}, + }; + } }; static void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams) { @@ -3062,92 +3073,6 @@ int main(int argc, char ** argv) { } }; - // register Health API routes - svr->Get ("/health", handle_health); - - auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { - return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { - res.set_content(reinterpret_cast(content), len, mime_type); - return false; - }; - }; - - // - // Router - // - - // register static assets routes - if (!sparams.public_path.empty()) { - // Set the base directory for serving static files - svr->set_base_dir(sparams.public_path); - } - - // using embedded static files - svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); - svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); - svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); - svr->Get("/json-schema-to-grammar.mjs", handle_static_file( - json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8")); - - // - // Start the server - // - if (sparams.n_threads_http < 1) { - // +2 threads for monitoring endpoints - sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); - } - log_data["n_threads_http"] = std::to_string(sparams.n_threads_http); - svr->new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; - - LOG_INFO("HTTP server listening", log_data); - - // run the HTTP server in a thread - see comment below - std::thread t([&]() { - if (!svr->listen_after_bind()) { - state.store(SERVER_STATE_ERROR); - return 1; - } - - return 0; - }); - - // load the model - if (!ctx_server.load_model(params)) { - state.store(SERVER_STATE_ERROR); - return 1; - } else { - ctx_server.init(); - state.store(SERVER_STATE_READY); - } - - LOG_INFO("model loaded", {}); - - const auto model_meta = ctx_server.model_meta(); - - // if a custom chat template is not supplied, we will use the one that comes with the model (if any) - if (sparams.chat_template.empty()) { - if (!ctx_server.validate_model_chat_template()) { - LOG_ERROR("The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {}); - sparams.chat_template = "chatml"; - } - } - - // print sample chat example to make it clear which template is used - { - json chat; - chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}}); - chat.push_back({{"role", "user"}, {"content", "Hello"}}); - chat.push_back({{"role", "assistant"}, {"content", "Hi there"}}); - chat.push_back({{"role", "user"}, {"content", "How are you?"}}); - - const std::string chat_example = format_chat(ctx_server.model, sparams.chat_template, chat); - - LOG_INFO("chat template", { - {"chat_example", chat_example}, - {"built_in", sparams.chat_template.empty()}, - }); - } - // // Middlewares // @@ -3208,6 +3133,52 @@ int main(int argc, char ** argv) { return httplib::Server::HandlerResponse::Unhandled; }); + auto middleware_model_loading = [&sparams, &res_error](const httplib::Request & req, httplib::Response & res, server_state current_state) { + static const std::set invalid_endpoints = { + "/slots", + "/metrics", + "/props", + "/v1/models", + "/completion", + "/completions", + "/v1/completions", + "/chat/completions", + "/v1/chat/completions", + "/infill", + "/tokenize", + "/detokenize", + "/embedding", + "/embeddings", + "/v1/embeddings", + }; + + // If path is not in invalid_endpoints list, skip validation + if (invalid_endpoints.find(req.path) == invalid_endpoints.end()) { + return true; + } + switch (current_state) { + case SERVER_STATE_LOADING_MODEL: + { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + return false; + } break; + case SERVER_STATE_ERROR: + { + res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER)); + return false; + } break; + } + return true; + }; + + svr->set_pre_routing_handler([&state, &middleware_model_loading](const httplib::Request & req, httplib::Response & res) { + server_state current_state = state.load(); + if (!middleware_model_loading(req, res, current_state)) { + return httplib::Server::HandlerResponse::Handled; + } + return httplib::Server::HandlerResponse::Unhandled; + }); + // // Route handlers (or controllers) // @@ -3531,25 +3502,6 @@ int main(int argc, char ** argv) { } }; - const auto handle_models = [¶ms, &model_meta](const httplib::Request & req, httplib::Response & res) { - res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); - - json models = { - {"object", "list"}, - {"data", { - { - {"id", params.model_alias}, - {"object", "model"}, - {"created", std::time(0)}, - {"owned_by", "llamacpp"}, - {"meta", model_meta} - }, - }} - }; - - res.set_content(models.dump(), "application/json; charset=utf-8"); - }; - const auto handle_chat_completions = [&ctx_server, &sparams, &res_error](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = oaicompat_completion_params_parse(ctx_server.model, json::parse(req.body), sparams.chat_template); @@ -3763,9 +3715,30 @@ int main(int argc, char ** argv) { : responses[0]; return res.set_content(root.dump(), "application/json; charset=utf-8"); }; + + const auto handle_models = [&](const httplib::Request & req, httplib::Response & res) { + res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); + json model_meta = ctx_server.model_meta(); + + json models = { + {"object", "list"}, + {"data", { + { + {"id", params.model_alias}, + {"object", "model"}, + {"created", std::time(0)}, + {"owned_by", "llamacpp"}, + {"meta", model_meta} + }, + }} + }; + + res.set_content(models.dump(), "application/json; charset=utf-8"); + }; // register API routes + svr->Get ("/health", handle_health); svr->Get ("/slots", handle_slots); svr->Get ("/metrics", handle_metrics); svr->Get ("/props", handle_props); @@ -3787,6 +3760,94 @@ int main(int argc, char ** argv) { svr->Post("/slots/:id_slot", handle_slots_action); } + auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { + return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { + res.set_content(reinterpret_cast(content), len, mime_type); + return false; + }; + }; + + // + // Router + // + + // register static assets routes + if (!sparams.public_path.empty()) { + // Set the base directory for serving static files + svr->set_base_dir(sparams.public_path); + } + + // using embedded static files + svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); + svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); + svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); + svr->Get("/json-schema-to-grammar.mjs", handle_static_file( + json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8")); + + // + // Start the server + // + if (sparams.n_threads_http < 1) { + // +2 threads for monitoring endpoints + sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); + } + log_data["n_threads_http"] = std::to_string(sparams.n_threads_http); + svr->new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; + + LOG_INFO("HTTP server listening", log_data); + + // run the HTTP server in a thread - see comment below + std::thread t([&]() { + if (!svr->listen_after_bind()) { + state.store(SERVER_STATE_ERROR); + return 1; + } + + return 0; + }); + + if (state.load() == SERVER_STATE_ERROR) { + // HTTP Server could not bind the port + return 1; + } + + // load the model + if (!ctx_server.load_model(params)) { + state.store(SERVER_STATE_ERROR); + return 1; + } else { + ctx_server.init(); + state.store(SERVER_STATE_READY); + } + + LOG_INFO("model loaded", {}); + + const auto model_meta = ctx_server.model_meta(); + + // if a custom chat template is not supplied, we will use the one that comes with the model (if any) + if (sparams.chat_template.empty()) { + if (!ctx_server.validate_model_chat_template()) { + LOG_ERROR("The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {}); + sparams.chat_template = "chatml"; + } + } + + // print sample chat example to make it clear which template is used + { + json chat; + chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}}); + chat.push_back({{"role", "user"}, {"content", "Hello"}}); + chat.push_back({{"role", "assistant"}, {"content", "Hi there"}}); + chat.push_back({{"role", "user"}, {"content", "How are you?"}}); + + const std::string chat_example = format_chat(ctx_server.model, sparams.chat_template, chat); + + LOG_INFO("chat template", { + {"chat_example", chat_example}, + {"built_in", sparams.chat_template.empty()}, + }); + } + ctx_server.queue_tasks.on_new_task(std::bind( &server_context::process_single_task, &ctx_server, std::placeholders::_1)); ctx_server.queue_tasks.on_finish_multitask(std::bind( From b9613ef11a748ba3b8961fd0504aca89134d705e Mon Sep 17 00:00:00 2001 From: ManniX-ITA <20623405+mann1x@users.noreply.github.com> Date: Thu, 18 Apr 2024 22:14:33 +0200 Subject: [PATCH 3/5] Removed leftover --- examples/server/server.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cc455d8ce9150..2cc8025fe149e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2282,17 +2282,6 @@ struct server_context { {"size", llama_model_size (model)}, }; } - - json empty_model_meta() const { - return json { - {"vocab_type", llama_vocab_type (0)}, - {"n_vocab", llama_n_vocab (0)}, - {"n_ctx_train", llama_n_ctx_train (0)}, - {"n_embd", llama_n_embd (0)}, - {"n_params", llama_model_n_params(0)}, - {"size", llama_model_size (0)}, - }; - } }; static void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams) { From 61b483d3a8482334442a83a29164f7633bac5c3e Mon Sep 17 00:00:00 2001 From: ManniX-ITA <20623405+mann1x@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:03:23 +0200 Subject: [PATCH 4/5] Update server.cpp after code review https://github.com/ggerganov/llama.cpp/pull/6739/files/b9613ef11a748ba3b8961fd0504aca89134d705e --- examples/server/server.cpp | 53 +++++++------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2cc8025fe149e..4286682d4b648 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2282,6 +2282,7 @@ struct server_context { {"size", llama_model_size (model)}, }; } + }; static void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams) { @@ -3114,37 +3115,13 @@ int main(int argc, char ** argv) { return false; }; - // register server middlewares - svr->set_pre_routing_handler([&middleware_validate_api_key](const httplib::Request & req, httplib::Response & res) { - if (!middleware_validate_api_key(req, res)) { - return httplib::Server::HandlerResponse::Handled; - } - return httplib::Server::HandlerResponse::Unhandled; - }); - auto middleware_model_loading = [&sparams, &res_error](const httplib::Request & req, httplib::Response & res, server_state current_state) { - static const std::set invalid_endpoints = { - "/slots", - "/metrics", - "/props", - "/v1/models", - "/completion", - "/completions", - "/v1/completions", - "/chat/completions", - "/v1/chat/completions", - "/infill", - "/tokenize", - "/detokenize", - "/embedding", - "/embeddings", - "/v1/embeddings", - }; - // If path is not in invalid_endpoints list, skip validation - if (invalid_endpoints.find(req.path) == invalid_endpoints.end()) { + // If path is not an health check skip validation + if (req.path == "/health" || req.path == "/v1/health") { return true; } + switch (current_state) { case SERVER_STATE_LOADING_MODEL: { @@ -3160,9 +3137,10 @@ int main(int argc, char ** argv) { return true; }; - svr->set_pre_routing_handler([&state, &middleware_model_loading](const httplib::Request & req, httplib::Response & res) { + // register server middlewares + svr->set_pre_routing_handler([&middleware_validate_api_key, &state, &middleware_model_loading](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); - if (!middleware_model_loading(req, res, current_state)) { + if (!middleware_model_loading(req, res, current_state) || !middleware_validate_api_key(req, res)) { return httplib::Server::HandlerResponse::Handled; } return httplib::Server::HandlerResponse::Unhandled; @@ -3705,7 +3683,7 @@ int main(int argc, char ** argv) { return res.set_content(root.dump(), "application/json; charset=utf-8"); }; - const auto handle_models = [&](const httplib::Request & req, httplib::Response & res) { + const auto handle_models = [¶ms, &ctx_server](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json model_meta = ctx_server.model_meta(); @@ -3728,6 +3706,7 @@ int main(int argc, char ** argv) { // register API routes svr->Get ("/health", handle_health); + svr->Get ("/v1/health", handle_health); svr->Get ("/slots", handle_slots); svr->Get ("/metrics", handle_metrics); svr->Get ("/props", handle_props); @@ -3756,11 +3735,6 @@ int main(int argc, char ** argv) { }; }; - // - // Router - // - - // register static assets routes if (!sparams.public_path.empty()) { // Set the base directory for serving static files svr->set_base_dir(sparams.public_path); @@ -3794,12 +3768,7 @@ int main(int argc, char ** argv) { return 0; }); - - if (state.load() == SERVER_STATE_ERROR) { - // HTTP Server could not bind the port - return 1; - } - + // load the model if (!ctx_server.load_model(params)) { state.store(SERVER_STATE_ERROR); @@ -3811,8 +3780,6 @@ int main(int argc, char ** argv) { LOG_INFO("model loaded", {}); - const auto model_meta = ctx_server.model_meta(); - // if a custom chat template is not supplied, we will use the one that comes with the model (if any) if (sparams.chat_template.empty()) { if (!ctx_server.validate_model_chat_template()) { From 942f023930ee7b5877034de52fabd3e67aed3589 Mon Sep 17 00:00:00 2001 From: ManniX-ITA <20623405+mann1x@users.noreply.github.com> Date: Sat, 20 Apr 2024 09:05:57 +0200 Subject: [PATCH 5/5] Fixes unhandled status ready with default: switch Co-authored-by: Xuan Son Nguyen --- examples/server/server.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 4286682d4b648..a19d4babd9acb 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3133,6 +3133,8 @@ int main(int argc, char ** argv) { res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER)); return false; } break; + default: + break; } return true; };