Skip to content

Commit e864415

Browse files
committed
openai/serving_chat: log all tool-call arguments in streaming deltas
Previously only the first tool call’s arguments were captured when logging streaming delta content, which could miss information if multiple tool calls were present in a single delta. The extraction logic now concatenates the arguments from *all* tool calls ensuring complete logging. Additional changes: * Updated unit tests to remain within Ruff line-length limits (E501). * Auto-formatted touched files via project pre-commit hooks. Signed-off-by: Adrian Garcia <adrian.garcia@inceptionai.ai>
1 parent 6185d66 commit e864415

File tree

2 files changed

+16
-15
lines changed

2 files changed

+16
-15
lines changed

tests/test_logger.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_request_logger_log_outputs_integration():
472472
# Check input call: logger.info(format_string, request_id, prompt, params, ...)
473473
assert "Received request %s" in input_call[0]
474474
assert input_call[1] == "test-integration"
475-
475+
476476
# Check output call: logger.info(format_string, request_id, stream_info, outputs, ...)
477477
assert "Generated response %s%s" in output_call[0]
478478
assert output_call[1] == "test-integration"
@@ -498,13 +498,13 @@ def test_streaming_complete_logs_full_text_content():
498498

499499
mock_logger.info.assert_called_once()
500500
call_args = mock_logger.info.call_args.args
501-
501+
502502
# Verify the logged output is the full text, not a token count format
503503
logged_output = call_args[3]
504504
assert logged_output == full_response
505505
assert "tokens>" not in logged_output # Ensure it's not the old token count format
506506
assert "streaming_complete" not in logged_output # Ensure it's not the fallback format
507-
507+
508508
# Verify other parameters
509509
assert call_args[1] == "test-streaming-full-text"
510510
assert call_args[2] == " (streaming complete)"

vllm/entrypoints/openai/serving_chat.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ async def chat_completion_stream_generator(
493493

494494
# Always track previous_texts for comprehensive output logging
495495
previous_texts = [""] * num_choices
496-
496+
497497
# Only one of these will be used, thus previous_texts and
498498
# all_previous_token_ids will not be used twice in the same iteration.
499499
if tool_choice_auto or self.reasoning_parser:
@@ -868,12 +868,11 @@ async def chat_completion_stream_generator(
868868
delta_content = ""
869869
if delta_message.content:
870870
delta_content = delta_message.content
871-
elif (delta_message.tool_calls
872-
and delta_message.tool_calls[0].function and
873-
delta_message.tool_calls[0].function.arguments):
874-
func_args = delta_message.tool_calls[
875-
0].function.arguments
876-
delta_content = func_args
871+
elif delta_message.tool_calls:
872+
delta_content = "".join(
873+
tc.function.arguments
874+
for tc in delta_message.tool_calls
875+
if tc.function and tc.function.arguments)
877876

878877
if delta_content:
879878
self.request_logger.log_outputs(
@@ -1021,14 +1020,16 @@ async def chat_completion_stream_generator(
10211020
if self.enable_log_outputs and self.request_logger:
10221021
# Log the complete response for each choice
10231022
for i in range(num_choices):
1024-
full_text = (previous_texts[i] if previous_texts
1025-
and i < len(previous_texts) else
1026-
f"<streaming_complete: {previous_num_tokens[i]} tokens>"
1027-
)
1023+
full_text = (
1024+
previous_texts[i]
1025+
if previous_texts and i < len(previous_texts) else
1026+
f"<streaming_complete: {previous_num_tokens[i]} tokens>"
1027+
)
10281028
self.request_logger.log_outputs(
10291029
request_id=request_id,
10301030
outputs=full_text,
1031-
output_token_ids=None, # Consider also logging all token IDs
1031+
output_token_ids=
1032+
None, # Consider also logging all token IDs
10321033
finish_reason="streaming_complete",
10331034
is_streaming=True,
10341035
delta=False,

0 commit comments

Comments
 (0)