Skip to content

Commit a0293e9

Browse files
committed
update code
1 parent 27a001c commit a0293e9

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ class at the server level, which is too granular for ModelRunner.
10231023
if self.parallel_config.tensor_parallel_degree > 1:
10241024
paddle.distributed.broadcast(sampler_output.sampled_token_ids, 0)
10251025

1026+
self.sampler.post_process(sampled_token_ids, skip_idx_list)
10261027
else:
10271028
self.sampler(logits, self.sampling_metadata,
10281029
self.parallel_config.max_model_len, self.share_inputs)
@@ -1092,7 +1093,6 @@ class at the server level, which is too granular for ModelRunner.
10921093
self.speculative_config,
10931094
self.parallel_config.enable_prefix_caching,
10941095
)
1095-
10961096
self._update_chunked_prefill(model_forward_batch)
10971097
self._add_cache(model_forward_batch)
10981098
return None

0 commit comments

Comments
 (0)