Skip to content

Commit d48c034

Browse files
authored
Feature/logprob bug fix (#2817)
* fix: handle missing logprobs at step 0 and incorrect finish reason with max_completion_tokens * Prevent response_logprobs.logprob_token_ids[0] from going out of bounds
1 parent e9e8443 commit d48c034

File tree

1 file changed

+23
-10
lines changed

1 file changed

+23
-10
lines changed

fastdeploy/entrypoints/openai/serving_chat.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ async def chat_completion_stream_generator(
212212
sampled_token_ranks=raw_top_logprobs[2],
213213
)
214214
logprobs_res = self.build_logprobs_response(
215-
logprobs=top_logprobs,
215+
request_logprobs=request.logprobs,
216+
response_logprobs=top_logprobs,
216217
request_top_logprobs=request.top_logprobs,
217218
)
218219

@@ -229,7 +230,9 @@ async def chat_completion_stream_generator(
229230
if res["finished"]:
230231
num_choices -= 1
231232
work_process_metrics.e2e_request_latency.observe(time.time() - res["metrics"]["request_start_time"])
232-
if request.max_tokens is None or previous_num_tokens != request.max_tokens:
233+
has_no_token_limit = request.max_tokens is None and request.max_completion_tokens is None
234+
max_tokens = request.max_completion_tokens or request.max_tokens
235+
if has_no_token_limit or previous_num_tokens != max_tokens:
233236
choice.finish_reason = "stop"
234237
if self.engine_client.reasoning_parser == "ernie_x1" and \
235238
output.get("finish_reason", "") == "tool_calls":
@@ -337,7 +340,8 @@ async def chat_completion_full_generator(
337340
sampled_token_ranks=raw_top_logprobs[2],
338341
)
339342
logprobs_res = self.build_logprobs_response(
340-
logprobs=top_logprobs,
343+
request_logprobs=request.logprobs,
344+
response_logprobs=top_logprobs,
341345
request_top_logprobs=request.top_logprobs,
342346
)
343347
if logprobs_res and logprobs_res.content is not None:
@@ -369,7 +373,9 @@ async def chat_completion_full_generator(
369373
logprobs=logprobs_full_res,
370374
finish_reason=None
371375
)
372-
if request.max_tokens is None or previous_num_tokens != request.max_tokens:
376+
has_no_token_limit = request.max_tokens is None and request.max_completion_tokens is None
377+
max_tokens = request.max_completion_tokens or request.max_tokens
378+
if has_no_token_limit or previous_num_tokens != max_tokens:
373379
choice.finish_reason = "stop"
374380
if self.engine_client.reasoning_parser == "ernie_x1" and \
375381
output.get("finish_reason", "") == "tool_calls":
@@ -400,7 +406,8 @@ async def chat_completion_full_generator(
400406

401407
def build_logprobs_response(
402408
self,
403-
logprobs: Optional[LogprobsLists],
409+
request_logprobs: bool,
410+
response_logprobs: Optional[LogprobsLists],
404411
request_top_logprobs: int,
405412
) -> Optional[LogProbs]:
406413
"""
@@ -410,17 +417,23 @@ def build_logprobs_response(
410417

411418
# Parameter validation
412419
if (
413-
logprobs is None
420+
response_logprobs is None
421+
or not request_logprobs
414422
or request_top_logprobs is None
415-
or request_top_logprobs <= 0
416-
or len(logprobs.logprob_token_ids) == 0
423+
or request_top_logprobs < 0
417424
):
418425
return None
419426

420427
try:
421428
# The top-k candidates for the current token
422-
topk_token_ids = logprobs.logprob_token_ids[0][:request_top_logprobs + 1]
423-
topk_logprobs = logprobs.logprobs[0][:request_top_logprobs + 1]
429+
topk_token_ids = []
430+
topk_logprobs = []
431+
432+
if response_logprobs.logprob_token_ids and len(response_logprobs.logprob_token_ids) > 0:
433+
topk_token_ids = response_logprobs.logprob_token_ids[0][:request_top_logprobs + 1]
434+
435+
if response_logprobs.logprobs and len(response_logprobs.logprobs) > 0:
436+
topk_logprobs = response_logprobs.logprobs[0][:request_top_logprobs + 1]
424437

425438
# Construct the candidate token structure (LogProbEntry) of topk
426439
top_logprob_entries: List[LogProbEntry] = []

0 commit comments

Comments
 (0)