Skip to content

Commit 585af35

Browse files
committed
allow deepseek models to enable chunked prefill on NPUs
Signed-off-by: rjg-lyh <1318825571@qq.com>
1 parent 160a560 commit 585af35

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

vllm_ascend/platform.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
105105
# RayWorkerWrapper monkey patch when setup
106106
from vllm_ascend.patch import ray_patch # noqa: F401
107107

108+
if vllm_config.additional_config and vllm_config.additional_config.get(
109+
"enable_mla_chunked_prefill", False):
110+
logger.info("MLA is enabled on NPU platform; restoring chunked "
111+
"prefill to be enabled.")
112+
from vllm.config import _DEFAULT_MAX_NUM_BATCHED_TOKENS
113+
scheduler_config = vllm_config.scheduler_config
114+
scheduler_config.enable_chunked_prefill = True
115+
scheduler_config.chunked_prefill_enabled = True
116+
if scheduler_config.num_scheduler_steps == 1:
117+
scheduler_config.max_num_batched_tokens = (
118+
_DEFAULT_MAX_NUM_BATCHED_TOKENS)
119+
108120
compilation_config = vllm_config.compilation_config
109121
if compilation_config and compilation_config.level != CompilationLevel.NO_COMPILATION:
110122
logger.warning(
@@ -152,6 +164,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
152164
"ascend_scheduler_config", None) is not None:
153165
additional_scheduler_config = additional_config.get(
154166
"ascend_scheduler_config")
167+
if vllm_config.scheduler_config.enable_chunked_prefill:
168+
additional_scheduler_config[
169+
"enable_chunked_prefill"] = True
155170
from vllm_ascend.core.schedule_config import \
156171
AscendSchedulerConfig
157172
ascend_scheduler_config = AscendSchedulerConfig.initialize_from_config(

0 commit comments

Comments
 (0)