Skip to content

Commit 0350831

Browse files
authored
fix xpu offline demo garbled output (#2763)
1 parent fee544e commit 0350831

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

fastdeploy/worker/xpu_model_runner.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ def xpu_process_output(
142142

143143

144144
def xpu_post_process(sampled_token_ids: paddle.Tensor,
145-
model_output: ModelOutputData) -> None:
145+
model_output: ModelOutputData,
146+
skip_save_output: bool) -> None:
146147
"""
147148
148149
"""
@@ -185,12 +186,13 @@ def xpu_post_process(sampled_token_ids: paddle.Tensor,
185186
)
186187
# 3. Transmit the model's output and stop generation signal via message queue.
187188
# In the future, we will abandon this approach.
188-
save_output(
189-
sampled_token_ids,
190-
model_output.not_need_stop,
191-
model_output.mp_rank,
192-
False, # use_ep
193-
)
189+
if not skip_save_output:
190+
save_output(
191+
sampled_token_ids,
192+
model_output.not_need_stop,
193+
model_output.mp_rank,
194+
False, # use_ep
195+
)
194196

195197

196198
def step_paddle(share_inputs: Dict[str, paddle.Tensor], block_size: int,
@@ -658,14 +660,15 @@ def _dummy_run(self,
658660
self._dummy_prefill_inputs(num_tokens, batch_size)
659661

660662
while True:
661-
self.execute_model(None)
663+
self.execute_model(None, True)
662664

663665
if int((self.share_inputs['seq_lens_this_time'] > 0).sum()) == 0:
664666
break
665667

666668
def execute_model(
667669
self,
668670
model_forward_batch: Optional[List[Request]] = None,
671+
is_dummy_run: bool = False,
669672
) -> Optional[ModelRunnerOutput]:
670673
"""
671674
The Entrance of model execute.
@@ -721,7 +724,8 @@ class at the server level, which is too granular for ModelRunner.
721724
accept_num=None,
722725
)
723726
xpu_post_process(sampled_token_ids=sampled_token_ids,
724-
model_output=model_output_data)
727+
model_output=model_output_data,
728+
skip_save_output=is_dummy_run)
725729

726730
# 7. Updata 'infer_seed' and step_paddle()
727731
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)

fastdeploy/worker/xpu_worker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def determine_available_memory(self) -> int:
8686
You may limit the usage of GPU memory
8787
by adjusting the `gpu_memory_utilization` parameter.
8888
"""
89-
# logger.warn("XPU current could not determine available memory")
9089
from fastdeploy.model_executor.ops.xpu import \
9190
xpu_get_free_global_memory, xpu_get_total_global_memory, xpu_get_used_global_memory
9291

0 commit comments

Comments
 (0)