openai/serving_chat: log all tool-call arguments in streaming deltas

mizadri · mizadri · commit e864415c90ec · 2025-07-12T00:40:57.000+04:00
Previously only the first tool call’s arguments were captured when logging
streaming delta content, which could miss information if multiple tool calls
were present in a single delta.  The extraction logic now concatenates the
arguments from *all* tool calls ensuring complete logging.

Additional changes:
* Updated unit tests to remain within Ruff line-length limits (E501).
* Auto-formatted touched files via project pre-commit hooks.

Signed-off-by: Adrian Garcia &lt;adrian.garcia@inceptionai.ai&gt;
diff --git a/tests/test_logger.py b/tests/test_logger.py
@@ -472,7 +472,7 @@ def test_request_logger_log_outputs_integration():
         # Check input call: logger.info(format_string, request_id, prompt, params, ...)
         assert "Received request %s" in input_call[0]
         assert input_call[1] == "test-integration"
-        
+
         # Check output call: logger.info(format_string, request_id, stream_info, outputs, ...)
         assert "Generated response %s%s" in output_call[0]
         assert output_call[1] == "test-integration"
@@ -498,13 +498,13 @@ def test_streaming_complete_logs_full_text_content():
 
         mock_logger.info.assert_called_once()
         call_args = mock_logger.info.call_args.args
-        
+
         # Verify the logged output is the full text, not a token count format
         logged_output = call_args[3]
         assert logged_output == full_response
         assert "tokens>" not in logged_output  # Ensure it's not the old token count format
         assert "streaming_complete" not in logged_output  # Ensure it's not the fallback format
-        
+
         # Verify other parameters
         assert call_args[1] == "test-streaming-full-text"
         assert call_args[2] == " (streaming complete)"
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -493,7 +493,7 @@ async def chat_completion_stream_generator(
 
         # Always track previous_texts for comprehensive output logging
         previous_texts = [""] * num_choices
-        
+
         # Only one of these will be used, thus previous_texts and
         # all_previous_token_ids will not be used twice in the same iteration.
         if tool_choice_auto or self.reasoning_parser:
@@ -868,12 +868,11 @@ async def chat_completion_stream_generator(
                         delta_content = ""
                         if delta_message.content:
                             delta_content = delta_message.content
-                        elif (delta_message.tool_calls
-                              and delta_message.tool_calls[0].function and
-                              delta_message.tool_calls[0].function.arguments):
-                            func_args = delta_message.tool_calls[
-                                0].function.arguments
-                            delta_content = func_args
+                        elif delta_message.tool_calls:
+                            delta_content = "".join(
+                                tc.function.arguments
+                                for tc in delta_message.tool_calls
+                                if tc.function and tc.function.arguments)
 
                         if delta_content:
                             self.request_logger.log_outputs(
@@ -1021,14 +1020,16 @@ async def chat_completion_stream_generator(
             if self.enable_log_outputs and self.request_logger:
                 # Log the complete response for each choice
                 for i in range(num_choices):
-                    full_text = (previous_texts[i] if previous_texts
-                                 and i < len(previous_texts) else
-                                 f"<streaming_complete: {previous_num_tokens[i]} tokens>"
-                                 )
+                    full_text = (
+                        previous_texts[i]
+                        if previous_texts and i < len(previous_texts) else
+                        f"<streaming_complete: {previous_num_tokens[i]} tokens>"
+                    )
                     self.request_logger.log_outputs(
                         request_id=request_id,
                         outputs=full_text,
-                        output_token_ids=None,  # Consider also logging all token IDs
+                        output_token_ids=
+                        None,  # Consider also logging all token IDs
                         finish_reason="streaming_complete",
                         is_streaming=True,
                         delta=False,