Skip to content

Commit e766c60

Browse files
committed
update code
1 parent d0a9be3 commit e766c60

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,7 @@ class at the server level, which is too granular for ModelRunner.
10281028
if self.parallel_config.tensor_parallel_degree > 1:
10291029
paddle.distributed.broadcast(sampled_token_ids, 0)
10301030

1031+
self.sampler.post_process(sampled_token_ids, skip_idx_list)
10311032
else:
10321033
self.sampler(logits, self.sampling_metadata,
10331034
self.parallel_config.max_model_len, self.share_inputs)
@@ -1097,7 +1098,6 @@ class at the server level, which is too granular for ModelRunner.
10971098
self.speculative_config,
10981099
self.parallel_config.enable_prefix_caching,
10991100
)
1100-
11011101
self._update_chunked_prefill(model_forward_batch)
11021102
self._add_cache(model_forward_batch)
11031103
return None

0 commit comments

Comments
 (0)