From ebeebd6f2b743d63f7e96dda9fabb69243671e17 Mon Sep 17 00:00:00 2001
From: puzhen <1303385763@qq.com>
Date: Wed, 8 Oct 2025 11:31:59 +0200
Subject: [PATCH 1/4] update

---
 camel/societies/workforce/utils.py     |  18 +++
 camel/societies/workforce/workforce.py | 152 ++++++++++++++++++++++++-
 2 files changed, 166 insertions(+), 4 deletions(-)

diff --git a/camel/societies/workforce/utils.py b/camel/societies/workforce/utils.py
index 388801246e..371c63cd91 100644
--- a/camel/societies/workforce/utils.py
+++ b/camel/societies/workforce/utils.py
@@ -42,6 +42,24 @@ class TaskResult(BaseModel):
     )
 
 
+class QualityEvaluation(BaseModel):
+    r"""Quality evaluation result for a completed task."""
+
+    quality_sufficient: bool = Field(
+        description="Whether the task result meets quality standards."
+    )
+    quality_score: int = Field(
+        description="Quality score from 0 to 100.", ge=0, le=100
+    )
+    issues: List[str] = Field(
+        default_factory=list,
+        description="List of quality issues found in the result.",
+    )
+    improvement_suggestion: Optional[str] = Field(
+        default=None, description="Specific suggestion to improve the result."
+    )
+
+
 class TaskAssignment(BaseModel):
     r"""An individual task assignment within a batch."""
 
diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py
index f0b9ffc5cb..665c9b9ee8 100644
--- a/camel/societies/workforce/workforce.py
+++ b/camel/societies/workforce/workforce.py
@@ -57,6 +57,7 @@
 from camel.societies.workforce.task_channel import TaskChannel
 from camel.societies.workforce.utils import (
     FailureContext,
+    QualityEvaluation,
     RecoveryDecision,
     RecoveryStrategy,
     TaskAssignment,
@@ -2643,6 +2644,87 @@ async def _post_ready_tasks(self) -> None:
                 # fine
                 pass
 
+    def _evaluate_task_quality(self, task: Task) -> QualityEvaluation:
+        r"""Evaluate the quality of a completed task using Task Agent.
+
+        Args:
+            task (Task): The completed task to evaluate.
+
+        Returns:
+            QualityEvaluation: The quality evaluation result.
+        """
+        quality_prompt = f'''Evaluate the quality of this task completion:
+
+Original Task: {task.content}
+Task Result: {task.result}
+
+Evaluate based on:
+1. Does the result fully address all requirements in the task?
+2. Is the result complete, accurate, and well-structured?
+3. Are there any missing elements or quality issues?
+
+Provide a quality score (0-100) and specific issues if any.'''
+
+        try:
+            if self.use_structured_output_handler:
+                enhanced_prompt = (
+                    self.structured_handler.generate_structured_prompt(
+                        base_prompt=quality_prompt,
+                        schema=QualityEvaluation,
+                        examples=[
+                            {
+                                "quality_sufficient": True,
+                                "quality_score": 98,
+                                "issues": [],
+                                "improvement_suggestion": None,
+                            }
+                        ],
+                    )
+                )
+
+                self.task_agent.reset()
+                response = self.task_agent.step(enhanced_prompt)
+
+                result = self.structured_handler.parse_structured_response(
+                    response.msg.content if response.msg else "",
+                    schema=QualityEvaluation,
+                    fallback_values={
+                        "quality_sufficient": True,
+                        "quality_score": 80,
+                        "issues": [],
+                        "improvement_suggestion": None,
+                    },
+                )
+
+                if isinstance(result, QualityEvaluation):
+                    return result
+                elif isinstance(result, dict):
+                    return QualityEvaluation(**result)
+                else:
+                    return QualityEvaluation(
+                        quality_sufficient=True,
+                        quality_score=80,
+                        issues=["Failed to parse quality evaluation"],
+                        improvement_suggestion=None,
+                    )
+            else:
+                self.task_agent.reset()
+                response = self.task_agent.step(
+                    quality_prompt, response_format=QualityEvaluation
+                )
+                return response.msg.parsed
+
+        except Exception as e:
+            logger.warning(
+                f"Error during quality evaluation: {e}, returning default"
+            )
+            return QualityEvaluation(
+                quality_sufficient=True,
+                quality_score=75,
+                issues=[f"Quality evaluation error: {e!s}"],
+                improvement_suggestion=None,
+            )
+
     async def _handle_failed_task(self, task: Task) -> bool:
         task.failure_count += 1
 
@@ -3345,11 +3427,73 @@ async def _listen_to_channel(self) -> None:
                             )
                             continue
                     else:
-                        print(
-                            f"{Fore.CYAN}🎯 Task {returned_task.id} completed "
-                            f"successfully.{Fore.RESET}"
+                        quality_eval = self._evaluate_task_quality(
+                            returned_task
                         )
-                        await self._handle_completed_task(returned_task)
+
+                        if not quality_eval.quality_sufficient:
+                            logger.info(
+                                f"Task {returned_task.id} quality check: "
+                                f"score={quality_eval.quality_score}, "
+                                f"issues={quality_eval.issues}"
+                            )
+
+                            if (
+                                returned_task.failure_count < 2
+                                and quality_eval.improvement_suggestion
+                            ):
+                                enhanced_content = (
+                                    f"{returned_task.content}\n\n"
+                                    f"Quality improvement needed: "
+                                    f"{quality_eval.improvement_suggestion}"
+                                )
+                                returned_task.content = enhanced_content
+                                returned_task.state = TaskState.FAILED
+                                returned_task.result = (
+                                    f"Quality insufficient (score: "
+                                    f"{quality_eval.quality_score}). "
+                                    f"Issues: {', '.join(quality_eval.issues)}"
+                                )
+
+                                try:
+                                    halt = await self._handle_failed_task(
+                                        returned_task
+                                    )
+                                    if not halt:
+                                        continue
+                                    print(
+                                        f"{Fore.RED}Task {returned_task.id} "
+                                        f"quality check failed after retries. "
+                                        f"Final score: {quality_eval.quality_score}"
+                                        f"{Fore.RESET}"
+                                    )
+                                    await self._graceful_shutdown(
+                                        returned_task
+                                    )
+                                    break
+                                except Exception as e:
+                                    logger.error(
+                                        f"Error handling quality-failed task "
+                                        f"{returned_task.id}: {e}",
+                                        exc_info=True,
+                                    )
+                                    continue
+                            else:
+                                print(
+                                    f"{Fore.YELLOW} Task {returned_task.id} "
+                                    f"completed with quality score: "
+                                    f"{quality_eval.quality_score}{Fore.RESET}"
+                                )
+                                await self._handle_completed_task(
+                                    returned_task
+                                )
+                        else:
+                            print(
+                                f"{Fore.CYAN} Task {returned_task.id} completed"
+                                f"successfully (quality score: "
+                                f"{quality_eval.quality_score}).{Fore.RESET}"
+                            )
+                            await self._handle_completed_task(returned_task)
                 elif returned_task.state == TaskState.FAILED:
                     try:
                         halt = await self._handle_failed_task(returned_task)

From 91a51146bd4ba3d75235d93e0599369082204a04 Mon Sep 17 00:00:00 2001
From: puzhen <1303385763@qq.com>
Date: Wed, 8 Oct 2025 11:50:25 +0200
Subject: [PATCH 2/4] update

---
 camel/societies/workforce/workforce.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py
index 665c9b9ee8..c2479d0910 100644
--- a/camel/societies/workforce/workforce.py
+++ b/camel/societies/workforce/workforce.py
@@ -3462,9 +3462,10 @@ async def _listen_to_channel(self) -> None:
                                     if not halt:
                                         continue
                                     print(
-                                        f"{Fore.RED}Task {returned_task.id} "
-                                        f"quality check failed after retries. "
-                                        f"Final score: {quality_eval.quality_score}"
+                                        f"{Fore.RED}Task {returned_task.id}"
+                                        f"quality check failed after retries."
+                                        f"Final score: "
+                                        f"{quality_eval.quality_score}"
                                         f"{Fore.RESET}"
                                     )
                                     await self._graceful_shutdown(
@@ -3489,7 +3490,8 @@ async def _listen_to_channel(self) -> None:
                                 )
                         else:
                             print(
-                                f"{Fore.CYAN} Task {returned_task.id} completed"
+                                f"{Fore.CYAN}"
+                                f"Task {returned_task.id} completed "
                                 f"successfully (quality score: "
                                 f"{quality_eval.quality_score}).{Fore.RESET}"
                             )

From 74e745622f86608b4b4ee5daa20b9aff56714462 Mon Sep 17 00:00:00 2001
From: puzhen <1303385763@qq.com>
Date: Wed, 8 Oct 2025 16:54:30 +0200
Subject: [PATCH 3/4] update

---
 camel/societies/workforce/workforce.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py
index c2479d0910..aab478d86e 100644
--- a/camel/societies/workforce/workforce.py
+++ b/camel/societies/workforce/workforce.py
@@ -2702,10 +2702,13 @@ def _evaluate_task_quality(self, task: Task) -> QualityEvaluation:
                     return QualityEvaluation(**result)
                 else:
                     return QualityEvaluation(
-                        quality_sufficient=True,
-                        quality_score=80,
-                        issues=["Failed to parse quality evaluation"],
-                        improvement_suggestion=None,
+                        quality_sufficient=False,
+                        quality_score=0,
+                        issues=[
+                            "Failed to parse quality " "evaluation response"
+                        ],
+                        improvement_suggestion="Retry with "
+                        "clearer task requirements",
                     )
             else:
                 self.task_agent.reset()
@@ -2716,13 +2719,15 @@ def _evaluate_task_quality(self, task: Task) -> QualityEvaluation:
 
         except Exception as e:
             logger.warning(
-                f"Error during quality evaluation: {e}, returning default"
+                f"Error during quality evaluation: {e}, assuming insufficient"
             )
             return QualityEvaluation(
-                quality_sufficient=True,
-                quality_score=75,
+                quality_sufficient=False,
+                quality_score=0,
                 issues=[f"Quality evaluation error: {e!s}"],
-                improvement_suggestion=None,
+                improvement_suggestion=(
+                    "Unable to evaluate quality, please retry"
+                ),
             )
 
     async def _handle_failed_task(self, task: Task) -> bool:

From 02d2cb2068dc13eb564bc678b57fde40561d34e0 Mon Sep 17 00:00:00 2001
From: puzhen <1303385763@qq.com>
Date: Thu, 9 Oct 2025 13:34:51 +0100
Subject: [PATCH 4/4] update

---
 camel/models/base_model.py                    | 202 ++++++++++++++++++
 .../hybrid_browser_toolkit_example.py         |  40 +++-
 .../test_context_summarizer_toolkit.py        |   5 +-
 3 files changed, 241 insertions(+), 6 deletions(-)

diff --git a/camel/models/base_model.py b/camel/models/base_model.py
index 023f543a1e..fbee2660cb 100644
--- a/camel/models/base_model.py
+++ b/camel/models/base_model.py
@@ -12,6 +12,8 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 import abc
+import copy
+import json
 import os
 import re
 from abc import ABC, abstractmethod
@@ -122,6 +124,14 @@ def __init__(
         )
         self._log_dir = os.environ.get("CAMEL_LOG_DIR", "camel_logs")
 
+        # Snapshot cleaning configuration
+        self._snapshot_cleanup_trigger = int(
+            os.environ.get("CAMEL_SNAPSHOT_CLEANUP_TRIGGER", "3")
+        )
+        self._snapshot_keep_recent = int(
+            os.environ.get("CAMEL_SNAPSHOT_KEEP_RECENT", "1")
+        )
+
     @property
     @abstractmethod
     def token_counter(self) -> BaseTokenCounter:
@@ -258,6 +268,146 @@ def preprocess_messages(
 
         return formatted_messages
 
+    def _count_snapshots(self, messages: List[OpenAIMessage]) -> int:
+        """Count the number of messages containing snapshot key"""
+        count = 0
+        for msg in messages:
+            if msg.get('role') == 'tool' and msg.get('content'):
+                try:
+                    content_str = msg['content']
+                    if (
+                        isinstance(content_str, str)
+                        and "'snapshot':" in content_str
+                    ):
+                        count += 1
+                        continue
+
+                    # Try to parse as JSON
+                    if isinstance(content_str, str):
+                        try:
+                            content = json.loads(content_str)
+                        except:
+                            import ast
+
+                            content = ast.literal_eval(content_str)
+                    else:
+                        content = content_str
+
+                    if isinstance(content, dict) and 'snapshot' in content:
+                        count += 1
+                        logger.debug(
+                            f"Found snapshot #{count} in message (parsed)"
+                        )
+                except Exception as e:
+                    logger.debug(f"Error parsing message content: {e}")
+                    pass
+        logger.info(f"Total snapshot count: {count}")
+        return count
+
+    def _clean_old_snapshots(
+        self, messages: List[OpenAIMessage], keep_recent: int = 1
+    ) -> List[OpenAIMessage]:
+        """Clean old snapshots, keeping only the most recent ones"""
+        logger.info(
+            f"Starting snapshot cleanup with keep_recent={keep_recent}"
+        )
+        messages_copy = copy.deepcopy(messages)
+
+        # Find all messages with snapshots
+        snapshot_indices = []
+        for i, msg in enumerate(messages_copy):
+            if msg.get('role') == 'tool' and msg.get('content'):
+                try:
+                    content_str = msg['content']
+
+                    if (
+                        isinstance(content_str, str)
+                        and "'snapshot':" in content_str
+                    ):
+                        snapshot_indices.append(i)
+                        logger.debug(
+                            f"Found snapshot "
+                            f"at message index {i} (string search)"
+                        )
+                        continue
+
+                    # Try to parse as JSON
+                    if isinstance(content_str, str):
+                        try:
+                            content = json.loads(content_str)
+                        except:
+                            import ast
+
+                            content = ast.literal_eval(content_str)
+                    else:
+                        content = content_str
+
+                    if isinstance(content, dict) and 'snapshot' in content:
+                        snapshot_indices.append(i)
+                        logger.debug(
+                            f"Found snapshot at message index {i} (parsed)"
+                        )
+                except Exception as e:
+                    logger.debug(f"Error checking message {i}: {e}")
+                    pass
+
+        logger.info(f"Found {len(snapshot_indices)} snapshots in messages")
+
+        # Keep only the last 'keep_recent' snapshots
+        if len(snapshot_indices) > keep_recent:
+            indices_to_clean = snapshot_indices[:-keep_recent]
+            logger.info(
+                f"Will clean {len(indices_to_clean)} old snapshots"
+                f", keeping the last {keep_recent}"
+            )
+
+            for idx in indices_to_clean:
+                msg = messages_copy[idx]
+                try:
+                    content_str = msg['content']
+
+                    if isinstance(content_str, str):
+                        # Try to parse the content
+                        try:
+                            content = json.loads(content_str)
+                            is_json = True
+                        except:
+                            import ast
+
+                            content = ast.literal_eval(content_str)
+                            is_json = False
+
+                        # Replace snapshot
+                        content['snapshot'] = (
+                            'snapshot history has been deleted'
+                        )
+
+                        # Convert back to string in the same format
+                        if is_json:
+                            msg['content'] = json.dumps(content)
+                        else:
+                            # Keep as Python dict string
+                            msg['content'] = str(content)
+
+                        logger.debug(f"Cleaned snapshot at index {idx}")
+                    elif isinstance(msg['content'], dict):
+                        msg['content']['snapshot'] = (
+                            'snapshot history has been deleted'
+                        )
+                        logger.debug(f"Cleaned snapshot at index {idx}")
+                except Exception as e:
+                    logger.error(
+                        f"Failed to clean snapshot at index {idx}: {e}"
+                    )
+                    pass
+        else:
+            logger.info(
+                f"No cleaning needed only {len(snapshot_indices)} snapshots"
+                f"keep_recent is {keep_recent}"
+            )
+
+        return messages_copy
+
     def _log_request(self, messages: List[OpenAIMessage]) -> Optional[str]:
         r"""Log the request messages to a JSON file if logging is enabled.
 
@@ -410,6 +560,32 @@ def run(
                 `ChatCompletionStreamManager[BaseModel]` in the structured
                 stream mode.
         """
+        # Check if we should clean snapshots
+        logger.info(
+            f"Snapshot cleanup config: trigger={self._snapshot_cleanup_trigger}, keep_recent={self._snapshot_keep_recent}"
+        )
+        snapshot_count = self._count_snapshots(messages)
+
+        if snapshot_count > 0:
+            logger.info(
+                f"Checking if {snapshot_count} % {self._snapshot_cleanup_trigger} == 0: {snapshot_count % self._snapshot_cleanup_trigger == 0}"
+            )
+            if snapshot_count % self._snapshot_cleanup_trigger == 0:
+                logger.info(
+                    f"Snapshot count ({snapshot_count}) is multiple of {self._snapshot_cleanup_trigger}, "
+                    f"cleaning old snapshots..."
+                )
+                messages = self._clean_old_snapshots(
+                    messages, keep_recent=self._snapshot_keep_recent
+                )
+                logger.info(
+                    f"Cleaned snapshots, keeping only the {self._snapshot_keep_recent} most recent"
+                )
+            else:
+                logger.info(
+                    f"No cleaning needed. {snapshot_count} is not a multiple of {self._snapshot_cleanup_trigger}"
+                )
+
         # Log the request if logging is enabled
         log_path = self._log_request(messages)
 
@@ -464,6 +640,32 @@ async def arun(
                 `AsyncChatCompletionStreamManager[BaseModel]` in the structured
                 stream mode.
         """
+        # Check if we should clean snapshots
+        logger.info(
+            f"Snapshot cleanup config: trigger={self._snapshot_cleanup_trigger}, keep_recent={self._snapshot_keep_recent}"
+        )
+        snapshot_count = self._count_snapshots(messages)
+
+        if snapshot_count > 0:
+            logger.info(
+                f"Checking if {snapshot_count} % {self._snapshot_cleanup_trigger} == 0: {snapshot_count % self._snapshot_cleanup_trigger == 0}"
+            )
+            if snapshot_count % self._snapshot_cleanup_trigger == 0:
+                logger.info(
+                    f"Snapshot count ({snapshot_count}) is multiple of {self._snapshot_cleanup_trigger}, "
+                    f"cleaning old snapshots..."
+                )
+                messages = self._clean_old_snapshots(
+                    messages, keep_recent=self._snapshot_keep_recent
+                )
+                logger.info(
+                    f"Cleaned snapshots, keeping only the {self._snapshot_keep_recent} most recent"
+                )
+            else:
+                logger.info(
+                    f"No cleaning needed. {snapshot_count} is not a multiple of {self._snapshot_cleanup_trigger}"
+                )
+
         # Log the request if logging is enabled
         log_path = self._log_request(messages)
 
diff --git a/examples/toolkits/hybrid_browser_toolkit_example.py b/examples/toolkits/hybrid_browser_toolkit_example.py
index 28b7a3d62e..851965a722 100644
--- a/examples/toolkits/hybrid_browser_toolkit_example.py
+++ b/examples/toolkits/hybrid_browser_toolkit_example.py
@@ -13,25 +13,55 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 import asyncio
 import logging
+import sys
 
 from camel.agents import ChatAgent
 from camel.models import ModelFactory
 from camel.toolkits import HybridBrowserToolkit
 from camel.types import ModelPlatformType, ModelType
 
+# Remove all existing handlers
+root = logging.getLogger()
+if root.handlers:
+    for handler in list(root.handlers):
+        root.removeHandler(handler)
+
+# Configure logging with a simple handler
 logging.basicConfig(
     level=logging.DEBUG,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[
-        logging.StreamHandler(),
+        logging.StreamHandler(sys.stdout),
     ],
+    force=True,
 )
 
-logging.getLogger('camel.agents').setLevel(logging.INFO)
-logging.getLogger('camel.models').setLevel(logging.INFO)
+# Set specific loggers to INFO level to see API context logs
+# Note: get_logger adds 'camel.' prefix, so the actual logger name is 'camel.camel.agents.chat_agent'
+logging.getLogger('camel').setLevel(logging.INFO)
+logging.getLogger('camel.camel').setLevel(logging.INFO)
+logging.getLogger('camel.camel.agents').setLevel(logging.INFO)
+logging.getLogger('camel.camel.agents.chat_agent').setLevel(logging.INFO)
 logging.getLogger('camel.models').setLevel(logging.INFO)
-logging.getLogger('camel.toolkits.hybrid_browser_toolkit').setLevel(
-    logging.DEBUG
+logging.getLogger('camel.toolkits').setLevel(logging.INFO)
+
+# Also try without the double camel prefix in case it's different
+logging.getLogger('camel.agents.chat_agent').setLevel(logging.INFO)
+
+# Ensure the chat_agent logger is properly configured
+chat_agent_logger = logging.getLogger('camel.camel.agents.chat_agent')
+chat_agent_logger.setLevel(logging.INFO)
+chat_agent_logger.propagate = True
+
+print("Logging configured. Chat agent logger level:", chat_agent_logger.level)
+print("Effective level:", chat_agent_logger.getEffectiveLevel())
+print("Logger name:", chat_agent_logger.name)
+print("Has handlers:", bool(chat_agent_logger.handlers))
+print(
+    "Parent has handlers:",
+    bool(chat_agent_logger.parent.handlers)
+    if chat_agent_logger.parent
+    else False,
 )
 USER_DATA_DIR = "User_Data"
 
diff --git a/test/toolkits/test_context_summarizer_toolkit.py b/test/toolkits/test_context_summarizer_toolkit.py
index 775599754b..8b12162b1f 100644
--- a/test/toolkits/test_context_summarizer_toolkit.py
+++ b/test/toolkits/test_context_summarizer_toolkit.py
@@ -244,7 +244,10 @@ def test_conversation_history_removal_and_summary_push(
         backend_role = call_args[0][1]  # Second argument (role)
 
         # Verify the summary message content
-        assert "[Context Summary from Previous Conversation]" in summary_message.content
+        assert (
+            "[Context Summary from Previous Conversation]"
+            in summary_message.content
+        )
         assert summary_text in summary_message.content
         assert backend_role == OpenAIBackendRole.USER