32
32
ChatCompletionResponse ,
33
33
CompletionRequest ,
34
34
CompletionResponse ,
35
- ErrorResponse )
35
+ ErrorResponse ,
36
+ ControlSchedulerRequest )
36
37
from fastdeploy .entrypoints .openai .serving_chat import OpenAIServingChat
37
38
from fastdeploy .entrypoints .openai .serving_completion import \
38
39
OpenAIServingCompletion
@@ -279,7 +280,7 @@ def launch_api_server() -> None:
279
280
"""
280
281
if not is_port_available (args .host , args .port ):
281
282
raise Exception (f"The parameter `port`:{ args .port } is already in use." )
282
-
283
+
283
284
api_server_logger .info (
284
285
f"launch Fastdeploy api server... port: { args .port } " )
285
286
api_server_logger .info (f"args: { args .__dict__ } " )
@@ -326,7 +327,7 @@ def launch_metrics_server():
326
327
raise Exception (
327
328
f"The parameter `metrics_port`:{ args .metrics_port } is already in use."
328
329
)
329
-
330
+
330
331
prom_dir = cleanup_prometheus_files (True )
331
332
os .environ ["PROMETHEUS_MULTIPROC_DIR" ] = prom_dir
332
333
metrics_server_thread = threading .Thread (target = run_metrics_server ,
@@ -347,10 +348,39 @@ def reset_scheduler():
347
348
348
349
if llm_engine is None :
349
350
return Response ("Engine not loaded" , status_code = 500 )
350
- llm_engine .reset_scheduler ()
351
+ llm_engine .scheduler . reset_scheduler ()
351
352
return Response ("Scheduler Reset Successfully" , status_code = 200 )
352
353
353
354
355
+ @controller_app .post ("/controller/scheduler" )
356
+ def control_scheduler (request : ControlSchedulerRequest ):
357
+ """
358
+ Control the scheduler behavior with the given parameters.
359
+ """
360
+ content = ErrorResponse (object = "" , message = "Scheduler updated successfully" , code = 0 )
361
+
362
+ global llm_engine
363
+ if llm_engine is None :
364
+ content .message = "Engine is not loaded"
365
+ content .code = 500
366
+ return JSONResponse (content = content .model_dump (), status_code = 500 )
367
+
368
+ if request .reset :
369
+ llm_engine .scheduler .reset_scheduler ()
370
+
371
+ if request .load_shards_num or request .reallocate_shard :
372
+ if hasattr (llm_engine .scheduler , "update_config" ) and callable (llm_engine .scheduler .update_config ):
373
+ llm_engine .scheduler .update_config (
374
+ load_shards_num = request .load_shards_num ,
375
+ reallocate = request .reallocate_shard )
376
+ else :
377
+ content .message = "This scheduler doesn't support the `update_config()` method."
378
+ content .code = 400
379
+ return JSONResponse (content = content .model_dump (), status_code = 400 )
380
+
381
+ return JSONResponse (content = content .model_dump (), status_code = 200 )
382
+
383
+
354
384
def run_controller_server ():
355
385
"""
356
386
run controller server
@@ -371,6 +401,11 @@ def launch_controller_server():
371
401
f"The parameter `controller_port`:{ args .controller_port } is already in use."
372
402
)
373
403
404
+ if not is_port_available (args .host , args .controller_port ):
405
+ raise Exception (
406
+ f"The parameter `controller_port`:{ args .controller_port } is already in use."
407
+ )
408
+
374
409
controller_server_thread = threading .Thread (target = run_controller_server ,
375
410
daemon = True )
376
411
controller_server_thread .start ()
0 commit comments