Skip to content

Commit b5e4288

Browse files
authored
Global scheduler supports configuring hot updates (#2807)
* Check if the controller port is available * Global scheduler supports configuring hot updates * add interface: /controller/scheduler * add interface: /controller/scheduler
1 parent abbbd0c commit b5e4288

File tree

6 files changed

+207
-108
lines changed

6 files changed

+207
-108
lines changed

fastdeploy/engine/engine.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,6 @@ def __init__(self, cfg):
142142
disable_any_whitespace=self.cfg.disable_any_whitespace,
143143
)
144144

145-
def reset_scheduler(self):
146-
"""
147-
Reset the scheduler to its initial state.
148-
"""
149-
self.scheduler.reset()
150-
151145
def start(self, api_server_pid=None):
152146
"""
153147
Initializes the engine and starts its sub-services.

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
ChatCompletionResponse,
3333
CompletionRequest,
3434
CompletionResponse,
35-
ErrorResponse)
35+
ErrorResponse,
36+
ControlSchedulerRequest)
3637
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
3738
from fastdeploy.entrypoints.openai.serving_completion import \
3839
OpenAIServingCompletion
@@ -279,7 +280,7 @@ def launch_api_server() -> None:
279280
"""
280281
if not is_port_available(args.host, args.port):
281282
raise Exception(f"The parameter `port`:{args.port} is already in use.")
282-
283+
283284
api_server_logger.info(
284285
f"launch Fastdeploy api server... port: {args.port}")
285286
api_server_logger.info(f"args: {args.__dict__}")
@@ -326,7 +327,7 @@ def launch_metrics_server():
326327
raise Exception(
327328
f"The parameter `metrics_port`:{args.metrics_port} is already in use."
328329
)
329-
330+
330331
prom_dir = cleanup_prometheus_files(True)
331332
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
332333
metrics_server_thread = threading.Thread(target=run_metrics_server,
@@ -347,10 +348,39 @@ def reset_scheduler():
347348

348349
if llm_engine is None:
349350
return Response("Engine not loaded", status_code=500)
350-
llm_engine.reset_scheduler()
351+
llm_engine.scheduler.reset_scheduler()
351352
return Response("Scheduler Reset Successfully", status_code=200)
352353

353354

355+
@controller_app.post("/controller/scheduler")
356+
def control_scheduler(request: ControlSchedulerRequest):
357+
"""
358+
Control the scheduler behavior with the given parameters.
359+
"""
360+
content = ErrorResponse(object="", message="Scheduler updated successfully", code=0)
361+
362+
global llm_engine
363+
if llm_engine is None:
364+
content.message = "Engine is not loaded"
365+
content.code = 500
366+
return JSONResponse(content=content.model_dump(), status_code=500)
367+
368+
if request.reset:
369+
llm_engine.scheduler.reset_scheduler()
370+
371+
if request.load_shards_num or request.reallocate_shard:
372+
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
373+
llm_engine.scheduler.update_config(
374+
load_shards_num=request.load_shards_num,
375+
reallocate=request.reallocate_shard)
376+
else:
377+
content.message="This scheduler doesn't support the `update_config()` method."
378+
content.code=400
379+
return JSONResponse(content=content.model_dump(), status_code=400)
380+
381+
return JSONResponse(content=content.model_dump(), status_code=200)
382+
383+
354384
def run_controller_server():
355385
"""
356386
run controller server
@@ -371,6 +401,11 @@ def launch_controller_server():
371401
f"The parameter `controller_port`:{args.controller_port} is already in use."
372402
)
373403

404+
if not is_port_available(args.host, args.controller_port):
405+
raise Exception(
406+
f"The parameter `controller_port`:{args.controller_port} is already in use."
407+
)
408+
374409
controller_server_thread = threading.Thread(target=run_controller_server,
375410
daemon=True)
376411
controller_server_thread.start()

fastdeploy/entrypoints/openai/protocol.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,12 @@ def check_logprobs(cls, data):
542542
)
543543

544544
return data
545+
546+
547+
class ControlSchedulerRequest(BaseModel):
548+
"""
549+
Control scheduler request to the engine.
550+
"""
551+
reset: Optional[bool] = False
552+
load_shards_num: Optional[int] = None
553+
reallocate_shard: Optional[bool] = False

0 commit comments

Comments
 (0)