Skip to content

Commit c57611c

Browse files
Merge pull request #103 from raindaywhu/dev_whq_eplb1
fix bug when running benchmark by move forward_before behind return o…
2 parents 96fe998 + 162d106 commit c57611c

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,15 +1231,16 @@ def execute_model(
12311231
intermediate_tensors: Optional[IntermediateTensors] = None,
12321232
) -> Union[ModelRunnerOutput, torch.Tensor]:
12331233

1234-
if self.dynamic_eplb:
1235-
self.eplb_updator.forward_before()
1236-
12371234
with ProfileExecuteDuration().capture_async(
12381235
"prepare input and forward"):
12391236
self._update_states(scheduler_output)
12401237
if not scheduler_output.total_num_scheduled_tokens:
12411238
# Return empty ModelRunnerOuptut if there's no work to do.
12421239
return EMPTY_MODEL_RUNNER_OUTPUT
1240+
1241+
if self.dynamic_eplb:
1242+
self.eplb_updator.forward_before()
1243+
12431244
(attn_metadata, hidden_states, spec_decode_metadata, positions,
12441245
num_scheduled_tokens,
12451246
sample_indices) = (self._process_reqs(scheduler_output,

0 commit comments

Comments
 (0)