@@ -119,6 +119,26 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
119
119
from vllm .config import CompilationLevel # noqa: E402
120
120
compilation_config = vllm_config .compilation_config
121
121
model_config = vllm_config .model_config
122
+ additional_config = vllm_config .additional_config
123
+ parallel_config = vllm_config .parallel_config
124
+ cache_config = vllm_config .cache_config
125
+
126
+ if parallel_config :
127
+ # Default value for expert tensor parallel size
128
+ parallel_config .expert_tensor_parallel_size = parallel_config .tensor_parallel_size
129
+
130
+ # NOTE: When enable_expert_parallel is True, we follow vLLM convention:
131
+ # ep_size = world_size, which means expert_tensor_parallel_size must be 1
132
+ if (additional_config
133
+ and "expert_tensor_parallel_size" in additional_config
134
+ and not parallel_config .enable_expert_parallel ):
135
+ parallel_config .expert_tensor_parallel_size = int (
136
+ additional_config ["expert_tensor_parallel_size" ])
137
+
138
+ # Calculate expert parallel size based on world size
139
+ parallel_config .expert_parallel_size = (
140
+ parallel_config .world_size //
141
+ parallel_config .expert_tensor_parallel_size )
122
142
123
143
if model_config is None :
124
144
logger .warning ("Model config is missing. This may indicate "
@@ -127,9 +147,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127
147
else :
128
148
enforce_eager = getattr (model_config , "enforce_eager" , False )
129
149
130
- if vllm_config . additional_config is not None :
131
- enable_graph_mode = vllm_config . additional_config .get (
132
- "enable_graph_mode" , False )
150
+ if additional_config is not None :
151
+ enable_graph_mode = additional_config .get ("enable_graph_mode" ,
152
+ False )
133
153
if enable_graph_mode :
134
154
if enforce_eager :
135
155
raise RuntimeError (
@@ -139,7 +159,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
139
159
logger .warning (
140
160
"NPU graph mode is still experimental and not supported for V1 without mla currently, "
141
161
"it has been disabled automatically." )
142
- vllm_config . additional_config ["enable_graph_mode" ] = False
162
+ additional_config ["enable_graph_mode" ] = False
143
163
if model_config :
144
164
model_type = model_config .hf_config .model_type
145
165
if "deepseek" not in model_type :
@@ -178,7 +198,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
178
198
["vllm.unified_ascend_attention_with_output" ])
179
199
update_aclgraph_sizes (vllm_config )
180
200
181
- parallel_config = vllm_config .parallel_config
182
201
if parallel_config and parallel_config .worker_cls == "auto" :
183
202
if envs .VLLM_USE_V1 :
184
203
parallel_config .worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
@@ -190,7 +209,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
190
209
else :
191
210
parallel_config .worker_cls = "vllm_ascend.worker.worker.NPUWorker"
192
211
193
- cache_config = vllm_config .cache_config
194
212
if cache_config :
195
213
if cache_config .block_size is None :
196
214
cache_config .block_size = 128
@@ -202,11 +220,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
202
220
203
221
if envs .VLLM_USE_V1 :
204
222
# Activate custom ops for v1.
205
- vllm_config . compilation_config .custom_ops = ["all" ]
223
+ compilation_config .custom_ops = ["all" ]
206
224
# If ascend_scheduler_config exists in additional_config,
207
225
# extents original scheduler_config to use AscendScheduler.
208
226
209
- additional_config = vllm_config .additional_config
210
227
if additional_config and additional_config .get (
211
228
"ascend_scheduler_config" , None ) is not None :
212
229
additional_scheduler_config = additional_config .get (
0 commit comments