chore: enhance usage with stream mode (#3039)

fengju0213 · Wendong-Fan · web-flow · commit 5c7baeaab2fd · 2025-08-30T04:29:48.000+08:00
Co-authored-by: Wendong-Fan &lt;133094783+Wendong-Fan@users.noreply.github.com&gt;
Co-authored-by: Wendong-Fan &lt;w3ndong.fan@gmail.com&gt;
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
@@ -2665,12 +2665,6 @@ def _process_stream_chunks_with_accumulator(
         stream_completed = False
 
         for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
-
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -2741,7 +2735,49 @@ def _process_stream_chunks_with_accumulator(
                             )
 
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
 
         return stream_completed, tool_calls_complete
 
@@ -3369,18 +3405,13 @@ async def _aprocess_stream_chunks_with_accumulator(
         response_format: Optional[Type[BaseModel]] = None,
     ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
         r"""Async version of process streaming chunks with
-        content accumulator."""
+        content accumulator.
+        """
 
         tool_calls_complete = False
         stream_completed = False
 
         async for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
-
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -3454,7 +3485,49 @@ async def _aprocess_stream_chunks_with_accumulator(
                             )
 
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
 
         # Yield the final status as a tuple
         yield (stream_completed, tool_calls_complete)
diff --git a/examples/agents/chatagent_stream.py b/examples/agents/chatagent_stream.py
@@ -40,6 +40,8 @@
     model_type=ModelType.GPT_4O_MINI,
     model_config_dict={
         "stream": True,
+        # Ask OpenAI to include token usage in the final streamed chunk
+        "stream_options": {"include_usage": True},
     },
 )
 
@@ -103,6 +105,12 @@ def test_content_accumulation():
 
             previous_content = current_content
 
+    usage = response.info.get("usage", {})
+    print(
+        f"\n\nUsage: prompt={usage.get('prompt_tokens')}, "
+        f"completion={usage.get('completion_tokens')}, "
+        f"total={usage.get('total_tokens')}"
+    )
     print("\n✅ Content accumulation test passed!")
     print("\n" + "=" * 50)
     return True
@@ -147,6 +155,13 @@ async def test_async_tool_execution():
 
             previous_content = current_content
 
+    final_response = await streaming_response
+    usage = final_response.info.get("usage", {})
+    print(
+        f"\n\nUsage: prompt={usage.get('prompt_tokens')}, "
+        f"completion={usage.get('completion_tokens')}, "
+        f"total={usage.get('total_tokens')}"
+    )
     print("\n" + "=" * 50)
     return True
 
@@ -291,7 +306,6 @@ async def run_all_tests():
         return
 
     # Test async structured output
-
     if not await test_async_structured_output():
         print("❌ Async structured output test failed!")
         return