@@ -105,6 +105,18 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
105
105
# RayWorkerWrapper monkey patch when setup
106
106
from vllm_ascend .patch import ray_patch # noqa: F401
107
107
108
+ if vllm_config .additional_config and vllm_config .additional_config .get (
109
+ "enable_mla_chunked_prefill" , False ):
110
+ logger .info ("MLA is enabled on NPU platform; restoring chunked "
111
+ "prefill to be enabled." )
112
+ from vllm .config import _DEFAULT_MAX_NUM_BATCHED_TOKENS
113
+ scheduler_config = vllm_config .scheduler_config
114
+ scheduler_config .enable_chunked_prefill = True
115
+ scheduler_config .chunked_prefill_enabled = True
116
+ if scheduler_config .num_scheduler_steps == 1 :
117
+ scheduler_config .max_num_batched_tokens = (
118
+ _DEFAULT_MAX_NUM_BATCHED_TOKENS )
119
+
108
120
compilation_config = vllm_config .compilation_config
109
121
if compilation_config and compilation_config .level != CompilationLevel .NO_COMPILATION :
110
122
logger .warning (
@@ -152,6 +164,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
152
164
"ascend_scheduler_config" , None ) is not None :
153
165
additional_scheduler_config = additional_config .get (
154
166
"ascend_scheduler_config" )
167
+ if vllm_config .scheduler_config .enable_chunked_prefill :
168
+ additional_scheduler_config [
169
+ "enable_chunked_prefill" ] = True
155
170
from vllm_ascend .core .schedule_config import \
156
171
AscendSchedulerConfig
157
172
ascend_scheduler_config = AscendSchedulerConfig .initialize_from_config (
0 commit comments