Skip to content

Commit d07f737

Browse files
Merge branch 'develop' into mm_structred_output
2 parents 6aaed45 + 2c36074 commit d07f737

File tree

9 files changed

+215
-337
lines changed

9 files changed

+215
-337
lines changed

dockerfiles/Dockerfile.xpu

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlenlp:llm-base-gcc12.
22

33
WORKDIR /workspace
44

5+
ENV http_proxy=http://agent.baidu.com:8891
6+
ENV https_proxy=http://agent.baidu.com:8891
7+
58
RUN echo "\
69
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse \n\
710
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse \n\
@@ -12,10 +15,10 @@ RUN apt-get update && apt-get install -y libibverbs-dev librdmacm-dev cmake pybi
1215
# uninstall existing package
1316
RUN python -m pip uninstall paddlepaddle-gpu paddlepaddle-xpu -y
1417
# install paddlepaddle
15-
RUN python -m pip install --no-cache-dir --progress-bar off --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
18+
RUN python -m pip install --no-cache-dir --progress-bar off paddlepaddle-xpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
1619

1720
# get xtdk and xvllm and xre
18-
RUN mkdir -p /workspace/deps && cd /workspace/deps && wget https://klx-sdk-release-public.su.bcebos.com/xinfer/daily/eb/20250624/output.tar.gz && \
21+
RUN mkdir -p /workspace/deps && cd /workspace/deps && wget https://klx-sdk-release-public.su.bcebos.com/xinfer/daily/eb/20250710/output.tar.gz && \
1922
tar -zxf output.tar.gz && mv output xvllm && \
2023
wget https://klx-sdk-release-public.su.bcebos.com/xtdk_15fusion/dev/3.2.40.1/xtdk-llvm15-ubuntu2004_x86_64.tar.gz && \
2124
tar -zxf xtdk-llvm15-ubuntu2004_x86_64.tar.gz && mv xtdk-llvm15-ubuntu2004_x86_64 xtdk && \
@@ -36,3 +39,4 @@ RUN cd /workspace/FastDeploy && bash build.sh && python -m pip install --no-cach
3639

3740
ENV http_proxy=""
3841
ENV https_proxy=""
42+
ENV no_proxy=""

fastdeploy/download_model.py

Lines changed: 0 additions & 227 deletions
This file was deleted.

fastdeploy/engine/engine.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,6 @@ def __init__(self, cfg):
142142
disable_any_whitespace=self.cfg.disable_any_whitespace,
143143
)
144144

145-
def reset_scheduler(self):
146-
"""
147-
Reset the scheduler to its initial state.
148-
"""
149-
self.scheduler.reset()
150-
151145
def start(self, api_server_pid=None):
152146
"""
153147
Initializes the engine and starts its sub-services.

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
ChatCompletionResponse,
3333
CompletionRequest,
3434
CompletionResponse,
35-
ErrorResponse)
35+
ErrorResponse,
36+
ControlSchedulerRequest)
3637
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
3738
from fastdeploy.entrypoints.openai.serving_completion import \
3839
OpenAIServingCompletion
@@ -279,7 +280,7 @@ def launch_api_server() -> None:
279280
"""
280281
if not is_port_available(args.host, args.port):
281282
raise Exception(f"The parameter `port`:{args.port} is already in use.")
282-
283+
283284
api_server_logger.info(
284285
f"launch Fastdeploy api server... port: {args.port}")
285286
api_server_logger.info(f"args: {args.__dict__}")
@@ -326,7 +327,7 @@ def launch_metrics_server():
326327
raise Exception(
327328
f"The parameter `metrics_port`:{args.metrics_port} is already in use."
328329
)
329-
330+
330331
prom_dir = cleanup_prometheus_files(True)
331332
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
332333
metrics_server_thread = threading.Thread(target=run_metrics_server,
@@ -347,10 +348,39 @@ def reset_scheduler():
347348

348349
if llm_engine is None:
349350
return Response("Engine not loaded", status_code=500)
350-
llm_engine.reset_scheduler()
351+
llm_engine.scheduler.reset_scheduler()
351352
return Response("Scheduler Reset Successfully", status_code=200)
352353

353354

355+
@controller_app.post("/controller/scheduler")
356+
def control_scheduler(request: ControlSchedulerRequest):
357+
"""
358+
Control the scheduler behavior with the given parameters.
359+
"""
360+
content = ErrorResponse(object="", message="Scheduler updated successfully", code=0)
361+
362+
global llm_engine
363+
if llm_engine is None:
364+
content.message = "Engine is not loaded"
365+
content.code = 500
366+
return JSONResponse(content=content.model_dump(), status_code=500)
367+
368+
if request.reset:
369+
llm_engine.scheduler.reset_scheduler()
370+
371+
if request.load_shards_num or request.reallocate_shard:
372+
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
373+
llm_engine.scheduler.update_config(
374+
load_shards_num=request.load_shards_num,
375+
reallocate=request.reallocate_shard)
376+
else:
377+
content.message="This scheduler doesn't support the `update_config()` method."
378+
content.code=400
379+
return JSONResponse(content=content.model_dump(), status_code=400)
380+
381+
return JSONResponse(content=content.model_dump(), status_code=200)
382+
383+
354384
def run_controller_server():
355385
"""
356386
run controller server
@@ -371,6 +401,11 @@ def launch_controller_server():
371401
f"The parameter `controller_port`:{args.controller_port} is already in use."
372402
)
373403

404+
if not is_port_available(args.host, args.controller_port):
405+
raise Exception(
406+
f"The parameter `controller_port`:{args.controller_port} is already in use."
407+
)
408+
374409
controller_server_thread = threading.Thread(target=run_controller_server,
375410
daemon=True)
376411
controller_server_thread.start()

fastdeploy/entrypoints/openai/protocol.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,12 @@ def check_logprobs(cls, data):
542542
)
543543

544544
return data
545+
546+
547+
class ControlSchedulerRequest(BaseModel):
548+
"""
549+
Control scheduler request to the engine.
550+
"""
551+
reset: Optional[bool] = False
552+
load_shards_num: Optional[int] = None
553+
reallocate_shard: Optional[bool] = False

fastdeploy/rl/rollout_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
max_capture_batch_size: int = 64,
5959
guided_decoding_backend: str = "off",
6060
disable_any_whitespace: bool = True,
61+
enable_logprob: bool = False,
6162
):
6263
# Required parameters
6364
self.model_name_or_path = model_name_or_path
@@ -99,6 +100,7 @@ def __init__(
99100
self.max_capture_batch_size = max_capture_batch_size
100101
self.guided_decoding_backend = guided_decoding_backend
101102
self.disable_any_whitespace = disable_any_whitespace
103+
self.enable_logprob = enable_logprob
102104

103105
def __str__(self):
104106
return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())

0 commit comments

Comments
 (0)