Skip to content

Commit 03bb288

Browse files
authored
[BugFix] disaggregate prefill force producer dummy run with prefill (#1552)
### What this PR does / why we need it? Disaggregate prefill force producer dummy run with prefill ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Signed-off-by: liziyu <liziyu16@huawei.com>
1 parent 2eace07 commit 03bb288

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,11 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
372372
# NOTE: we need to use `in_profile_run` to determine whether `enable_force_load_balance` is True
373373
self.in_profile_run = False
374374

375+
# kv role
376+
self.is_kv_producer = False
377+
if vllm_config.kv_transfer_config is not None:
378+
self.is_kv_producer = vllm_config.kv_transfer_config.is_kv_producer
379+
375380
def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
376381
"""Update the cached states and the persistent batch with the scheduler
377382
output.
@@ -1521,6 +1526,10 @@ def _dummy_run(
15211526
) -> torch.Tensor:
15221527
if self.torchair_graph_enabled and not with_prefill:
15231528
num_tokens = self.select_torchair_padded_batch_size(num_tokens)
1529+
1530+
# For kv producer, with prefill always true
1531+
if self.is_kv_producer:
1532+
with_prefill = True
15241533
# Padding for DP
15251534
(num_tokens, num_tokens_across_dp, with_prefill,
15261535
enable_dbo) = self._get_forward_metadata_across_dp(

0 commit comments

Comments
 (0)