Skip to content

Commit c0a8c6d

Browse files
authored
server : health endpoint configurable failure on no slot (#5594)
1 parent b9111bd commit c0a8c6d

File tree

2 files changed

+31
-30
lines changed

2 files changed

+31
-30
lines changed

examples/server/README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,11 @@ node index.js
134134
## API Endpoints
135135

136136
- **GET** `/health`: Returns the current state of the server:
137-
- `{"status": "loading model"}` if the model is still being loaded.
138-
- `{"status": "error"}` if the model failed to load.
139-
- `{"status": "ok"}` if the model is successfully loaded and the server is ready for further requests mentioned below.
140-
- `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slot are currently available
137+
- 503 -> `{"status": "loading model"}` if the model is still being loaded.
138+
- 500 -> `{"status": "error"}` if the model failed to load.
139+
- 200 -> `{"status": "ok", "slots_idle": 1, "slots_processing": 2 }` if the model is successfully loaded and the server is ready for further requests mentioned below.
140+
- 200 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slot are currently available.
141+
- 503 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if the query parameter `fail_on_no_slot` is provided and no slot are currently available.
141142

142143
- **POST** `/completion`: Given a `prompt`, it returns the predicted completion.
143144

examples/server/server.cpp

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2582,40 +2582,40 @@ int main(int argc, char **argv)
25822582
res.set_header("Access-Control-Allow-Headers", "*");
25832583
});
25842584

2585-
svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
2585+
svr.Get("/health", [&](const httplib::Request& req, httplib::Response& res) {
25862586
server_state current_state = state.load();
25872587
switch(current_state) {
2588-
case SERVER_STATE_READY:
2589-
if (llama.all_slots_are_idle) {
2590-
res.set_content(R"({"status": "ok"})", "application/json");
2588+
case SERVER_STATE_READY: {
2589+
int available_slots = 0;
2590+
int processing_slots = 0;
2591+
for (llama_client_slot &slot: llama.slots) {
2592+
if (slot.available()) {
2593+
available_slots++;
2594+
} else {
2595+
processing_slots++;
2596+
}
2597+
}
2598+
if (available_slots > 0) {
2599+
json health = {
2600+
{"status", "ok"},
2601+
{"slots_idle", available_slots},
2602+
{"slots_processing", processing_slots}};
2603+
res.set_content(health.dump(), "application/json");
25912604
res.status = 200; // HTTP OK
25922605
} else {
2593-
int available_slots = 0;
2594-
int processing_slots = 0;
2595-
for (llama_client_slot & slot : llama.slots) {
2596-
if (slot.available()) {
2597-
available_slots++;
2598-
} else {
2599-
processing_slots++;
2600-
}
2601-
}
2602-
if (available_slots > 0) {
2603-
json health = {
2604-
{"status", "ok"},
2605-
{"slots_idle", available_slots},
2606-
{"slots_processing", processing_slots}};
2607-
res.set_content(health.dump(), "application/json");
2608-
res.status = 200; // HTTP OK
2609-
} else {
2610-
json health = {
2611-
{"status", "no slot available"},
2612-
{"slots_idle", available_slots},
2613-
{"slots_processing", processing_slots}};
2614-
res.set_content(health.dump(), "application/json");
2606+
json health = {
2607+
{"status", "no slot available"},
2608+
{"slots_idle", available_slots},
2609+
{"slots_processing", processing_slots}};
2610+
res.set_content(health.dump(), "application/json");
2611+
if (req.has_param("fail_on_no_slot")) {
26152612
res.status = 503; // HTTP Service Unavailable
2613+
} else {
2614+
res.status = 200; // HTTP OK
26162615
}
26172616
}
26182617
break;
2618+
}
26192619
case SERVER_STATE_LOADING_MODEL:
26202620
res.set_content(R"({"status": "loading model"})", "application/json");
26212621
res.status = 503; // HTTP Service Unavailable

0 commit comments

Comments
 (0)