feat(agents): add configurable accumulated vs delta streaming with memory optimization (#3145)

coolbeevip · web-flow · commit 637773a397b3 · 2025-09-17T16:48:56.000+08:00
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
@@ -142,7 +142,7 @@ class StreamContentAccumulator:
 
     def __init__(self):
         self.base_content = ""  # Content before tool calls
-        self.current_content = ""  # Current streaming content
+        self.current_content = []  # Accumulated streaming fragments
         self.tool_status_messages = []  # Accumulated tool status messages
 
     def set_base_content(self, content: str):
@@ -151,7 +151,7 @@ def set_base_content(self, content: str):
 
     def add_streaming_content(self, new_content: str):
         r"""Add new streaming content."""
-        self.current_content += new_content
+        self.current_content.append(new_content)
 
     def add_tool_status(self, status_message: str):
         r"""Add a tool status message."""
@@ -160,16 +160,18 @@ def add_tool_status(self, status_message: str):
     def get_full_content(self) -> str:
         r"""Get the complete accumulated content."""
         tool_messages = "".join(self.tool_status_messages)
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
 
     def get_content_with_new_status(self, status_message: str) -> str:
         r"""Get content with a new status message appended."""
         tool_messages = "".join([*self.tool_status_messages, status_message])
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
 
     def reset_streaming_content(self):
         r"""Reset only the streaming content, keep base and tool status."""
-        self.current_content = ""
+        self.current_content = []
 
 
 class StreamingChatAgentResponse:
@@ -397,6 +399,10 @@ class ChatAgent(BaseAgent):
         step_timeout (Optional[float], optional): Timeout in seconds for the
             entire step operation. If None, no timeout is applied.
             (default: :obj:`None`)
+        stream_accumulate (bool, optional): When True, partial streaming
+            updates return accumulated content (current behavior). When False,
+            partial updates return only the incremental delta. (default:
+            :obj:`True`)
     """
 
     def __init__(
@@ -440,6 +446,7 @@ def __init__(
         retry_attempts: int = 3,
         retry_delay: float = 1.0,
         step_timeout: Optional[float] = None,
+        stream_accumulate: bool = True,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -528,6 +535,7 @@ def __init__(
         self.retry_attempts = max(1, retry_attempts)
         self.retry_delay = max(0.0, retry_delay)
         self.step_timeout = step_timeout
+        self.stream_accumulate = stream_accumulate
 
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
@@ -3668,15 +3676,18 @@ def _create_streaming_response_with_accumulator(
     ) -> ChatAgentResponse:
         r"""Create a streaming response using content accumulator."""
 
-        # Add new content to accumulator and get full content
+        # Add new content; only build full content when needed
         accumulator.add_streaming_content(new_content)
-        full_content = accumulator.get_full_content()
+        if self.stream_accumulate:
+            message_content = accumulator.get_full_content()
+        else:
+            message_content = new_content
 
         message = BaseMessage(
             role_name=self.role_name,
             role_type=self.role_type,
             meta_dict={},
-            content=full_content,
+            content=message_content,
         )
 
         return ChatAgentResponse(
@@ -3686,7 +3697,7 @@ def _create_streaming_response_with_accumulator(
                 "id": response_id,
                 "usage": step_token_usage.copy(),
                 "finish_reasons": ["streaming"],
-                "num_tokens": self._get_token_count(full_content),
+                "num_tokens": self._get_token_count(message_content),
                 "tool_calls": tool_call_records or [],
                 "external_tool_requests": None,
                 "streaming": True,
@@ -3773,6 +3784,7 @@ def clone(self, with_memory: bool = False) -> ChatAgent:
             tool_execution_timeout=self.tool_execution_timeout,
             pause_event=self.pause_event,
             prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
+            stream_accumulate=self.stream_accumulate,
         )
 
         # Copy memory if requested
diff --git a/examples/agents/chatagent_stream.py b/examples/agents/chatagent_stream.py
@@ -18,14 +18,14 @@
 # Create a streaming model
 streaming_model = ModelFactory.create(
     model_platform=ModelPlatformType.DEFAULT,
-    model_type=ModelType.DEFAULT,
+    model_type=ModelType.GPT_4O_MINI,
     model_config_dict={
         "stream": True,
         "stream_options": {"include_usage": True},
     },
 )
 
-agent = ChatAgent(
+agent_accumulated = ChatAgent(
     system_message="You are a helpful assistant that provides detailed "
     "and informative responses.",
     model=streaming_model,
@@ -36,10 +36,29 @@
 "it impacts the environment."
 
 # Get streaming response
-streaming_response = agent.step(user_message)
+streaming_response = agent_accumulated.step(user_message)
 
 # Stream the response chunks
 for chunk_response in streaming_response:
     # Each chunk_response is a ChatAgentResponse with incremental content
     chunk_content = chunk_response.msgs[0].content
     print(chunk_content, end="", flush=True)
+
+print("\n\n---\nDelta streaming mode (stream_accumulate=False):\n")
+
+# Create an agent that yields delta chunks instead of accumulated content
+agent_delta = ChatAgent(
+    system_message="You are a helpful assistant that provides concise "
+    "and informative responses.",
+    model=streaming_model,
+    stream_accumulate=False,  # Only yield the delta part per chunk
+)
+
+# Get streaming response (delta chunks)
+streaming_response_delta = agent_delta.step(user_message)
+
+# Stream only the delta content per chunk; printing reconstructs the full text
+for chunk_response in streaming_response_delta:
+    delta_content = chunk_response.msgs[0].content
+    print(delta_content, end="", flush=True)
+print()
diff --git a/test/agents/test_chat_agent.py b/test/agents/test_chat_agent.py
@@ -30,7 +30,7 @@
 from pydantic import BaseModel, Field
 
 from camel.agents import ChatAgent
-from camel.agents.chat_agent import ToolCallingRecord
+from camel.agents.chat_agent import StreamContentAccumulator, ToolCallingRecord
 from camel.configs import ChatGPTConfig
 from camel.generators import SystemMessageGenerator
 from camel.memories import MemoryRecord
@@ -743,6 +743,55 @@ def test_chat_agent_stream_output(step_call_count=3):
         ), f"Error in calling round {i+1}"
 
 
+@pytest.mark.model_backend
+def test_chat_agent_stream_accumulate_mode_accumulated():
+    """Verify accumulated streaming behavior (stream_accumulate=True)."""
+    chunks = ["Hello", " ", "world"]
+    step_usage = {
+        "completion_tokens": 0,
+        "prompt_tokens": 0,
+        "total_tokens": 0,
+    }
+
+    agent = ChatAgent()
+    accumulator = StreamContentAccumulator()
+    outputs = []
+    for c in chunks:
+        resp = agent._create_streaming_response_with_accumulator(
+            accumulator, c, step_usage, "acc", []
+        )
+        outputs.append(resp.msg.content)
+
+    assert len(outputs) == 3
+    assert outputs[0] == "Hello"
+    assert outputs[1] == "Hello "
+    assert outputs[2] == "Hello world"
+    assert accumulator.get_full_content() == "Hello world"
+
+
+@pytest.mark.model_backend
+def test_chat_agent_stream_accumulate_mode_delta():
+    """Verify delta streaming behavior (stream_accumulate=False)."""
+    chunks = ["Hello", " ", "world"]
+    step_usage = {
+        "completion_tokens": 0,
+        "prompt_tokens": 0,
+        "total_tokens": 0,
+    }
+
+    agent = ChatAgent(stream_accumulate=False)
+    accumulator = StreamContentAccumulator()
+    outputs = []
+    for c in chunks:
+        resp = agent._create_streaming_response_with_accumulator(
+            accumulator, c, step_usage, "delta", []
+        )
+        outputs.append(resp.msg.content)
+
+    assert outputs == chunks
+    assert accumulator.get_full_content() == "Hello world"
+
+
 @pytest.mark.model_backend
 def test_set_output_language():
     system_message = BaseMessage(