From ebeebd6f2b743d63f7e96dda9fabb69243671e17 Mon Sep 17 00:00:00 2001 From: puzhen <1303385763@qq.com> Date: Wed, 8 Oct 2025 11:31:59 +0200 Subject: [PATCH 1/4] update --- camel/societies/workforce/utils.py | 18 +++ camel/societies/workforce/workforce.py | 152 ++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 4 deletions(-) diff --git a/camel/societies/workforce/utils.py b/camel/societies/workforce/utils.py index 388801246e..371c63cd91 100644 --- a/camel/societies/workforce/utils.py +++ b/camel/societies/workforce/utils.py @@ -42,6 +42,24 @@ class TaskResult(BaseModel): ) +class QualityEvaluation(BaseModel): + r"""Quality evaluation result for a completed task.""" + + quality_sufficient: bool = Field( + description="Whether the task result meets quality standards." + ) + quality_score: int = Field( + description="Quality score from 0 to 100.", ge=0, le=100 + ) + issues: List[str] = Field( + default_factory=list, + description="List of quality issues found in the result.", + ) + improvement_suggestion: Optional[str] = Field( + default=None, description="Specific suggestion to improve the result." + ) + + class TaskAssignment(BaseModel): r"""An individual task assignment within a batch.""" diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py index f0b9ffc5cb..665c9b9ee8 100644 --- a/camel/societies/workforce/workforce.py +++ b/camel/societies/workforce/workforce.py @@ -57,6 +57,7 @@ from camel.societies.workforce.task_channel import TaskChannel from camel.societies.workforce.utils import ( FailureContext, + QualityEvaluation, RecoveryDecision, RecoveryStrategy, TaskAssignment, @@ -2643,6 +2644,87 @@ async def _post_ready_tasks(self) -> None: # fine pass + def _evaluate_task_quality(self, task: Task) -> QualityEvaluation: + r"""Evaluate the quality of a completed task using Task Agent. + + Args: + task (Task): The completed task to evaluate. + + Returns: + QualityEvaluation: The quality evaluation result. + """ + quality_prompt = f'''Evaluate the quality of this task completion: + +Original Task: {task.content} +Task Result: {task.result} + +Evaluate based on: +1. Does the result fully address all requirements in the task? +2. Is the result complete, accurate, and well-structured? +3. Are there any missing elements or quality issues? + +Provide a quality score (0-100) and specific issues if any.''' + + try: + if self.use_structured_output_handler: + enhanced_prompt = ( + self.structured_handler.generate_structured_prompt( + base_prompt=quality_prompt, + schema=QualityEvaluation, + examples=[ + { + "quality_sufficient": True, + "quality_score": 98, + "issues": [], + "improvement_suggestion": None, + } + ], + ) + ) + + self.task_agent.reset() + response = self.task_agent.step(enhanced_prompt) + + result = self.structured_handler.parse_structured_response( + response.msg.content if response.msg else "", + schema=QualityEvaluation, + fallback_values={ + "quality_sufficient": True, + "quality_score": 80, + "issues": [], + "improvement_suggestion": None, + }, + ) + + if isinstance(result, QualityEvaluation): + return result + elif isinstance(result, dict): + return QualityEvaluation(**result) + else: + return QualityEvaluation( + quality_sufficient=True, + quality_score=80, + issues=["Failed to parse quality evaluation"], + improvement_suggestion=None, + ) + else: + self.task_agent.reset() + response = self.task_agent.step( + quality_prompt, response_format=QualityEvaluation + ) + return response.msg.parsed + + except Exception as e: + logger.warning( + f"Error during quality evaluation: {e}, returning default" + ) + return QualityEvaluation( + quality_sufficient=True, + quality_score=75, + issues=[f"Quality evaluation error: {e!s}"], + improvement_suggestion=None, + ) + async def _handle_failed_task(self, task: Task) -> bool: task.failure_count += 1 @@ -3345,11 +3427,73 @@ async def _listen_to_channel(self) -> None: ) continue else: - print( - f"{Fore.CYAN}🎯 Task {returned_task.id} completed " - f"successfully.{Fore.RESET}" + quality_eval = self._evaluate_task_quality( + returned_task ) - await self._handle_completed_task(returned_task) + + if not quality_eval.quality_sufficient: + logger.info( + f"Task {returned_task.id} quality check: " + f"score={quality_eval.quality_score}, " + f"issues={quality_eval.issues}" + ) + + if ( + returned_task.failure_count < 2 + and quality_eval.improvement_suggestion + ): + enhanced_content = ( + f"{returned_task.content}\n\n" + f"Quality improvement needed: " + f"{quality_eval.improvement_suggestion}" + ) + returned_task.content = enhanced_content + returned_task.state = TaskState.FAILED + returned_task.result = ( + f"Quality insufficient (score: " + f"{quality_eval.quality_score}). " + f"Issues: {', '.join(quality_eval.issues)}" + ) + + try: + halt = await self._handle_failed_task( + returned_task + ) + if not halt: + continue + print( + f"{Fore.RED}Task {returned_task.id} " + f"quality check failed after retries. " + f"Final score: {quality_eval.quality_score}" + f"{Fore.RESET}" + ) + await self._graceful_shutdown( + returned_task + ) + break + except Exception as e: + logger.error( + f"Error handling quality-failed task " + f"{returned_task.id}: {e}", + exc_info=True, + ) + continue + else: + print( + f"{Fore.YELLOW} Task {returned_task.id} " + f"completed with quality score: " + f"{quality_eval.quality_score}{Fore.RESET}" + ) + await self._handle_completed_task( + returned_task + ) + else: + print( + f"{Fore.CYAN} Task {returned_task.id} completed" + f"successfully (quality score: " + f"{quality_eval.quality_score}).{Fore.RESET}" + ) + await self._handle_completed_task(returned_task) elif returned_task.state == TaskState.FAILED: try: halt = await self._handle_failed_task(returned_task) From 91a51146bd4ba3d75235d93e0599369082204a04 Mon Sep 17 00:00:00 2001 From: puzhen <1303385763@qq.com> Date: Wed, 8 Oct 2025 11:50:25 +0200 Subject: [PATCH 2/4] update --- camel/societies/workforce/workforce.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py index 665c9b9ee8..c2479d0910 100644 --- a/camel/societies/workforce/workforce.py +++ b/camel/societies/workforce/workforce.py @@ -3462,9 +3462,10 @@ async def _listen_to_channel(self) -> None: if not halt: continue print( - f"{Fore.RED}Task {returned_task.id} " - f"quality check failed after retries. " - f"Final score: {quality_eval.quality_score}" + f"{Fore.RED}Task {returned_task.id}" + f"quality check failed after retries." + f"Final score: " + f"{quality_eval.quality_score}" f"{Fore.RESET}" ) await self._graceful_shutdown( @@ -3489,7 +3490,8 @@ async def _listen_to_channel(self) -> None: ) else: print( - f"{Fore.CYAN} Task {returned_task.id} completed" + f"{Fore.CYAN}" + f"Task {returned_task.id} completed " f"successfully (quality score: " f"{quality_eval.quality_score}).{Fore.RESET}" ) From 74e745622f86608b4b4ee5daa20b9aff56714462 Mon Sep 17 00:00:00 2001 From: puzhen <1303385763@qq.com> Date: Wed, 8 Oct 2025 16:54:30 +0200 Subject: [PATCH 3/4] update --- camel/societies/workforce/workforce.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/camel/societies/workforce/workforce.py b/camel/societies/workforce/workforce.py index c2479d0910..aab478d86e 100644 --- a/camel/societies/workforce/workforce.py +++ b/camel/societies/workforce/workforce.py @@ -2702,10 +2702,13 @@ def _evaluate_task_quality(self, task: Task) -> QualityEvaluation: return QualityEvaluation(**result) else: return QualityEvaluation( - quality_sufficient=True, - quality_score=80, - issues=["Failed to parse quality evaluation"], - improvement_suggestion=None, + quality_sufficient=False, + quality_score=0, + issues=[ + "Failed to parse quality " "evaluation response" + ], + improvement_suggestion="Retry with " + "clearer task requirements", ) else: self.task_agent.reset() @@ -2716,13 +2719,15 @@ def _evaluate_task_quality(self, task: Task) -> QualityEvaluation: except Exception as e: logger.warning( - f"Error during quality evaluation: {e}, returning default" + f"Error during quality evaluation: {e}, assuming insufficient" ) return QualityEvaluation( - quality_sufficient=True, - quality_score=75, + quality_sufficient=False, + quality_score=0, issues=[f"Quality evaluation error: {e!s}"], - improvement_suggestion=None, + improvement_suggestion=( + "Unable to evaluate quality, please retry" + ), ) async def _handle_failed_task(self, task: Task) -> bool: From 02d2cb2068dc13eb564bc678b57fde40561d34e0 Mon Sep 17 00:00:00 2001 From: puzhen <1303385763@qq.com> Date: Thu, 9 Oct 2025 13:34:51 +0100 Subject: [PATCH 4/4] update --- camel/models/base_model.py | 202 ++++++++++++++++++ .../hybrid_browser_toolkit_example.py | 40 +++- .../test_context_summarizer_toolkit.py | 5 +- 3 files changed, 241 insertions(+), 6 deletions(-) diff --git a/camel/models/base_model.py b/camel/models/base_model.py index 023f543a1e..fbee2660cb 100644 --- a/camel/models/base_model.py +++ b/camel/models/base_model.py @@ -12,6 +12,8 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import abc +import copy +import json import os import re from abc import ABC, abstractmethod @@ -122,6 +124,14 @@ def __init__( ) self._log_dir = os.environ.get("CAMEL_LOG_DIR", "camel_logs") + # Snapshot cleaning configuration + self._snapshot_cleanup_trigger = int( + os.environ.get("CAMEL_SNAPSHOT_CLEANUP_TRIGGER", "3") + ) + self._snapshot_keep_recent = int( + os.environ.get("CAMEL_SNAPSHOT_KEEP_RECENT", "1") + ) + @property @abstractmethod def token_counter(self) -> BaseTokenCounter: @@ -258,6 +268,146 @@ def preprocess_messages( return formatted_messages + def _count_snapshots(self, messages: List[OpenAIMessage]) -> int: + """Count the number of messages containing snapshot key""" + count = 0 + for msg in messages: + if msg.get('role') == 'tool' and msg.get('content'): + try: + content_str = msg['content'] + if ( + isinstance(content_str, str) + and "'snapshot':" in content_str + ): + count += 1 + continue + + # Try to parse as JSON + if isinstance(content_str, str): + try: + content = json.loads(content_str) + except: + import ast + + content = ast.literal_eval(content_str) + else: + content = content_str + + if isinstance(content, dict) and 'snapshot' in content: + count += 1 + logger.debug( + f"Found snapshot #{count} in message (parsed)" + ) + except Exception as e: + logger.debug(f"Error parsing message content: {e}") + pass + logger.info(f"Total snapshot count: {count}") + return count + + def _clean_old_snapshots( + self, messages: List[OpenAIMessage], keep_recent: int = 1 + ) -> List[OpenAIMessage]: + """Clean old snapshots, keeping only the most recent ones""" + logger.info( + f"Starting snapshot cleanup with keep_recent={keep_recent}" + ) + messages_copy = copy.deepcopy(messages) + + # Find all messages with snapshots + snapshot_indices = [] + for i, msg in enumerate(messages_copy): + if msg.get('role') == 'tool' and msg.get('content'): + try: + content_str = msg['content'] + + if ( + isinstance(content_str, str) + and "'snapshot':" in content_str + ): + snapshot_indices.append(i) + logger.debug( + f"Found snapshot " + f"at message index {i} (string search)" + ) + continue + + # Try to parse as JSON + if isinstance(content_str, str): + try: + content = json.loads(content_str) + except: + import ast + + content = ast.literal_eval(content_str) + else: + content = content_str + + if isinstance(content, dict) and 'snapshot' in content: + snapshot_indices.append(i) + logger.debug( + f"Found snapshot at message index {i} (parsed)" + ) + except Exception as e: + logger.debug(f"Error checking message {i}: {e}") + pass + + logger.info(f"Found {len(snapshot_indices)} snapshots in messages") + + # Keep only the last 'keep_recent' snapshots + if len(snapshot_indices) > keep_recent: + indices_to_clean = snapshot_indices[:-keep_recent] + logger.info( + f"Will clean {len(indices_to_clean)} old snapshots" + f", keeping the last {keep_recent}" + ) + + for idx in indices_to_clean: + msg = messages_copy[idx] + try: + content_str = msg['content'] + + if isinstance(content_str, str): + # Try to parse the content + try: + content = json.loads(content_str) + is_json = True + except: + import ast + + content = ast.literal_eval(content_str) + is_json = False + + # Replace snapshot + content['snapshot'] = ( + 'snapshot history has been deleted' + ) + + # Convert back to string in the same format + if is_json: + msg['content'] = json.dumps(content) + else: + # Keep as Python dict string + msg['content'] = str(content) + + logger.debug(f"Cleaned snapshot at index {idx}") + elif isinstance(msg['content'], dict): + msg['content']['snapshot'] = ( + 'snapshot history has been deleted' + ) + logger.debug(f"Cleaned snapshot at index {idx}") + except Exception as e: + logger.error( + f"Failed to clean snapshot at index {idx}: {e}" + ) + pass + else: + logger.info( + f"No cleaning needed only {len(snapshot_indices)} snapshots" + f"keep_recent is {keep_recent}" + ) + + return messages_copy + def _log_request(self, messages: List[OpenAIMessage]) -> Optional[str]: r"""Log the request messages to a JSON file if logging is enabled. @@ -410,6 +560,32 @@ def run( `ChatCompletionStreamManager[BaseModel]` in the structured stream mode. """ + # Check if we should clean snapshots + logger.info( + f"Snapshot cleanup config: trigger={self._snapshot_cleanup_trigger}, keep_recent={self._snapshot_keep_recent}" + ) + snapshot_count = self._count_snapshots(messages) + + if snapshot_count > 0: + logger.info( + f"Checking if {snapshot_count} % {self._snapshot_cleanup_trigger} == 0: {snapshot_count % self._snapshot_cleanup_trigger == 0}" + ) + if snapshot_count % self._snapshot_cleanup_trigger == 0: + logger.info( + f"Snapshot count ({snapshot_count}) is multiple of {self._snapshot_cleanup_trigger}, " + f"cleaning old snapshots..." + ) + messages = self._clean_old_snapshots( + messages, keep_recent=self._snapshot_keep_recent + ) + logger.info( + f"Cleaned snapshots, keeping only the {self._snapshot_keep_recent} most recent" + ) + else: + logger.info( + f"No cleaning needed. {snapshot_count} is not a multiple of {self._snapshot_cleanup_trigger}" + ) + # Log the request if logging is enabled log_path = self._log_request(messages) @@ -464,6 +640,32 @@ async def arun( `AsyncChatCompletionStreamManager[BaseModel]` in the structured stream mode. """ + # Check if we should clean snapshots + logger.info( + f"Snapshot cleanup config: trigger={self._snapshot_cleanup_trigger}, keep_recent={self._snapshot_keep_recent}" + ) + snapshot_count = self._count_snapshots(messages) + + if snapshot_count > 0: + logger.info( + f"Checking if {snapshot_count} % {self._snapshot_cleanup_trigger} == 0: {snapshot_count % self._snapshot_cleanup_trigger == 0}" + ) + if snapshot_count % self._snapshot_cleanup_trigger == 0: + logger.info( + f"Snapshot count ({snapshot_count}) is multiple of {self._snapshot_cleanup_trigger}, " + f"cleaning old snapshots..." + ) + messages = self._clean_old_snapshots( + messages, keep_recent=self._snapshot_keep_recent + ) + logger.info( + f"Cleaned snapshots, keeping only the {self._snapshot_keep_recent} most recent" + ) + else: + logger.info( + f"No cleaning needed. {snapshot_count} is not a multiple of {self._snapshot_cleanup_trigger}" + ) + # Log the request if logging is enabled log_path = self._log_request(messages) diff --git a/examples/toolkits/hybrid_browser_toolkit_example.py b/examples/toolkits/hybrid_browser_toolkit_example.py index 28b7a3d62e..851965a722 100644 --- a/examples/toolkits/hybrid_browser_toolkit_example.py +++ b/examples/toolkits/hybrid_browser_toolkit_example.py @@ -13,25 +13,55 @@ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import asyncio import logging +import sys from camel.agents import ChatAgent from camel.models import ModelFactory from camel.toolkits import HybridBrowserToolkit from camel.types import ModelPlatformType, ModelType +# Remove all existing handlers +root = logging.getLogger() +if root.handlers: + for handler in list(root.handlers): + root.removeHandler(handler) + +# Configure logging with a simple handler logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ - logging.StreamHandler(), + logging.StreamHandler(sys.stdout), ], + force=True, ) -logging.getLogger('camel.agents').setLevel(logging.INFO) -logging.getLogger('camel.models').setLevel(logging.INFO) +# Set specific loggers to INFO level to see API context logs +# Note: get_logger adds 'camel.' prefix, so the actual logger name is 'camel.camel.agents.chat_agent' +logging.getLogger('camel').setLevel(logging.INFO) +logging.getLogger('camel.camel').setLevel(logging.INFO) +logging.getLogger('camel.camel.agents').setLevel(logging.INFO) +logging.getLogger('camel.camel.agents.chat_agent').setLevel(logging.INFO) logging.getLogger('camel.models').setLevel(logging.INFO) -logging.getLogger('camel.toolkits.hybrid_browser_toolkit').setLevel( - logging.DEBUG +logging.getLogger('camel.toolkits').setLevel(logging.INFO) + +# Also try without the double camel prefix in case it's different +logging.getLogger('camel.agents.chat_agent').setLevel(logging.INFO) + +# Ensure the chat_agent logger is properly configured +chat_agent_logger = logging.getLogger('camel.camel.agents.chat_agent') +chat_agent_logger.setLevel(logging.INFO) +chat_agent_logger.propagate = True + +print("Logging configured. Chat agent logger level:", chat_agent_logger.level) +print("Effective level:", chat_agent_logger.getEffectiveLevel()) +print("Logger name:", chat_agent_logger.name) +print("Has handlers:", bool(chat_agent_logger.handlers)) +print( + "Parent has handlers:", + bool(chat_agent_logger.parent.handlers) + if chat_agent_logger.parent + else False, ) USER_DATA_DIR = "User_Data" diff --git a/test/toolkits/test_context_summarizer_toolkit.py b/test/toolkits/test_context_summarizer_toolkit.py index 775599754b..8b12162b1f 100644 --- a/test/toolkits/test_context_summarizer_toolkit.py +++ b/test/toolkits/test_context_summarizer_toolkit.py @@ -244,7 +244,10 @@ def test_conversation_history_removal_and_summary_push( backend_role = call_args[0][1] # Second argument (role) # Verify the summary message content - assert "[Context Summary from Previous Conversation]" in summary_message.content + assert ( + "[Context Summary from Previous Conversation]" + in summary_message.content + ) assert summary_text in summary_message.content assert backend_role == OpenAIBackendRole.USER