From 9008d1a5023e3c0b4099a9f433a329c6b4a3be23 Mon Sep 17 00:00:00 2001
From: human058382928 <162091348+human058382928@users.noreply.github.com>
Date: Fri, 9 May 2025 10:15:51 -0700
Subject: [PATCH 1/5] update

---
 backend/models.py                            |    2 +-
 examples/proposal_evaluation_example.py      |    6 +-
 services/workflows/base.py                   |    6 +
 services/workflows/capability_mixins.py      |  228 ++
 services/workflows/hierarchical_workflows.py |  476 +++
 services/workflows/proposal_evaluation.py    | 2784 ++++++++++++------
 6 files changed, 2523 insertions(+), 979 deletions(-)
 create mode 100644 services/workflows/capability_mixins.py
 create mode 100644 services/workflows/hierarchical_workflows.py

diff --git a/backend/models.py b/backend/models.py
index 6e188817..bce1d215 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -336,7 +336,7 @@ class ProposalBase(CustomBaseModel):
     end_block: Optional[int] = None
     start_block: Optional[int] = None
     liquid_tokens: Optional[str] = None  # Using string to handle large numbers
-    parameters: Optional[str] = None  # Hex encoded parameters
+    parameters: Optional[str] = None
     # Additional fields from blockchain data
     concluded_by: Optional[str] = None
     executed: Optional[bool] = None
diff --git a/examples/proposal_evaluation_example.py b/examples/proposal_evaluation_example.py
index 88bebc81..324bd2c9 100644
--- a/examples/proposal_evaluation_example.py
+++ b/examples/proposal_evaluation_example.py
@@ -37,14 +37,14 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
     # Create test parameters as a JSON object
     parameters = "let this rip https://media1.giphy.com/media/v1.Y2lkPTc5MGI3NjExN3VoZzJzdmV3eGs4M2VrOXBkamg2dTVhb2NhcndwNzVxNHplMzhoaiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/M7HkIkPrNhSy4/giphy.gif https://mkkhfmcrbwyuutcvtier.supabase.co/storage/v1/object/public/x-vote-media//img_2.jpeg"
 
-    # Convert parameters to JSON string and then hex encode it
-    parameters_hex = "0x" + binascii.hexlify(parameters.encode("utf-8")).decode("utf-8")
+    # # Convert parameters to JSON string and then hex encode it
+    # parameters_hex = "0x" + binascii.hexlify(parameters.encode("utf-8")).decode("utf-8")
 
     # Create a test proposal
     proposal_data = ProposalCreate(
         dao_id=dao_id,
         type=ProposalType.ACTION,
-        parameters=parameters_hex,  # Use hex encoded parameters
+        parameters=parameters,  # Use hex encoded parameters
         action="send_message",
         contract_principal="ST1PQHQKV0RJXZFY1DGX8MNSNYVE3VGZJSRTPGZGM.test-contract",
         creator="ST1PQHQKV0RJXZFY1DGX8MNSNYVE3VGZJSRTPGZGM",
diff --git a/services/workflows/base.py b/services/workflows/base.py
index 30a88011..856e00fe 100644
--- a/services/workflows/base.py
+++ b/services/workflows/base.py
@@ -212,7 +212,13 @@ async def execute(self, initial_state: StateType) -> Dict:
 
             # Execute the workflow
             self.logger.info(f"Executing workflow {self.__class__.__name__}")
+            self.logger.debug(
+                f"[DEBUG:Workflow:{self.__class__.__name__}] State before ain_invoke: {repr(initial_state)}"
+            )
             result = await app.ainvoke(initial_state)
+            self.logger.debug(
+                f"[DEBUG:Workflow:{self.__class__.__name__}] State after ain_invoke: {repr(result)}"
+            )
             self.logger.info(f"Workflow {self.__class__.__name__} execution completed")
             return result
 
diff --git a/services/workflows/capability_mixins.py b/services/workflows/capability_mixins.py
new file mode 100644
index 00000000..5fabc243
--- /dev/null
+++ b/services/workflows/capability_mixins.py
@@ -0,0 +1,228 @@
+"""Standardized mixins for adding capabilities to LangGraph workflows.
+
+This module provides a standardized approach to creating and integrating
+capabilities into LangGraph workflows through a mixin system.
+"""
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
+
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph
+
+from lib.logger import configure_logger
+
+logger = configure_logger(__name__)
+
+# Type variable for workflow states
+StateType = TypeVar("StateType", bound=Dict[str, Any])
+
+
+class CapabilityMixin(ABC):
+    """Abstract base class for workflow capability mixins.
+
+    All capability mixins should inherit from this class and implement
+    the required methods to ensure consistent integration with workflows.
+    """
+
+    @abstractmethod
+    def initialize(self, **kwargs) -> None:
+        """Initialize the capability with necessary configuration.
+
+        Args:
+            **kwargs: Arbitrary keyword arguments for configuration
+        """
+        pass
+
+    @abstractmethod
+    def add_to_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Add this capability's nodes and edges to a StateGraph.
+
+        Args:
+            graph: The StateGraph to add nodes/edges to
+            **kwargs: Additional arguments specific to this capability
+        """
+        pass
+
+
+class BaseCapabilityMixin(CapabilityMixin):
+    """Base implementation of capability mixin with common functionality.
+
+    Provides shared functionality for LLM configuration, state management,
+    and graph integration that most capability mixins can leverage.
+    """
+
+    def __init__(
+        self,
+        config: Optional[Dict[str, Any]] = None,
+        state_key: Optional[str] = None,
+    ):
+        """Initialize the base capability mixin.
+
+        Args:
+            config: Configuration dictionary with settings like model_name, temperature
+            state_key: Key to use when updating the state dictionary
+        """
+        self.config = config or {}
+        self.state_key = state_key
+        self.llm = None
+        self.logger = configure_logger(self.__class__.__name__)
+
+    def initialize(self, **kwargs) -> None:
+        """Initialize the capability with LLM and other settings.
+
+        Args:
+            **kwargs: Additional configuration parameters
+        """
+        # Update config with any passed kwargs
+        if kwargs:
+            self.config.update(kwargs)
+
+        # Create the LLM instance
+        self.llm = ChatOpenAI(
+            model=self.config.get("model_name", "gpt-4.1"),
+            temperature=self.config.get("temperature", 0.1),
+            streaming=self.config.get("streaming", True),
+            callbacks=self.config.get("callbacks", []),
+        )
+
+        if "state_key" in kwargs:
+            self.state_key = kwargs["state_key"]
+
+        self.logger.info(
+            f"Initialized {self.__class__.__name__} with config: {self.config}"
+        )
+
+    def configure(self, state_key: str) -> None:
+        """Configure the state key for this capability.
+
+        Args:
+            state_key: The key to use in the state dictionary
+        """
+        self.state_key = state_key
+
+    @abstractmethod
+    async def process(self, state: StateType) -> Dict[str, Any]:
+        """Process the current state and return updated values.
+
+        Args:
+            state: Current workflow state
+
+        Returns:
+            Dictionary with updated values to be added to the state
+        """
+        pass
+
+    def add_to_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Add this capability as a node to the graph.
+
+        Args:
+            graph: StateGraph to add node to
+            **kwargs: Additional arguments
+        """
+        if not self.state_key:
+            raise ValueError(f"state_key must be set for {self.__class__.__name__}")
+
+        node_name = kwargs.get("node_name", self.state_key)
+
+        async def node_function(state: StateType) -> StateType:
+            """Node function that processes state and updates it.
+
+            Args:
+                state: Current workflow state
+
+            Returns:
+                Updated workflow state
+            """
+            try:
+                result = await self.process(state)
+                # Update state with results
+                if isinstance(result, dict):
+                    # If returning a dict, merge with state using the state_key
+                    state[self.state_key] = result
+                return state
+            except Exception as e:
+                self.logger.error(f"Error in node {node_name}: {str(e)}", exc_info=True)
+                # Add error to state
+                if "errors" not in state:
+                    state["errors"] = []
+                state["errors"].append(
+                    {
+                        "node": node_name,
+                        "error": str(e),
+                        "type": self.__class__.__name__,
+                    }
+                )
+                return state
+
+        # Add the node to the graph
+        graph.add_node(node_name, node_function)
+        self.logger.info(f"Added node {node_name} to graph")
+
+
+class ComposableWorkflowMixin(CapabilityMixin):
+    """Mixin for creating composable workflows that can be nested.
+
+    This mixin allows workflows to be composed of sub-workflows and
+    provides utilities for managing their execution and state sharing.
+    """
+
+    def __init__(self, name: str = None):
+        """Initialize the composable workflow mixin.
+
+        Args:
+            name: Name identifier for this composable workflow
+        """
+        self.name = name or self.__class__.__name__
+        self.sub_workflows = {}
+        self.graph = None
+        self.logger = configure_logger(self.__class__.__name__)
+
+    def initialize(self, **kwargs) -> None:
+        """Initialize the composable workflow.
+
+        Args:
+            **kwargs: Configuration parameters
+        """
+        pass
+
+    def add_sub_workflow(
+        self,
+        name: str,
+        workflow: CapabilityMixin,
+        config: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Add a sub-workflow to this composable workflow.
+
+        Args:
+            name: Name identifier for the sub-workflow
+            workflow: The workflow object to add
+            config: Configuration for the sub-workflow
+        """
+        if config:
+            # Apply config to the sub-workflow
+            workflow.initialize(**config)
+        self.sub_workflows[name] = workflow
+        self.logger.info(f"Added sub-workflow {name} to {self.name}")
+
+    def build_graph(self) -> StateGraph:
+        """Build and return the composed workflow graph.
+
+        Returns:
+            StateGraph: The compiled workflow graph
+        """
+        raise NotImplementedError("Subclasses must implement build_graph")
+
+    def add_to_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Add this composable workflow to a parent graph.
+
+        For composable workflows, this typically involves adding a
+        subgraph node that represents the entire nested workflow.
+
+        Args:
+            graph: The parent StateGraph
+            **kwargs: Additional arguments
+        """
+        raise NotImplementedError("Subclasses must implement add_to_graph")
diff --git a/services/workflows/hierarchical_workflows.py b/services/workflows/hierarchical_workflows.py
new file mode 100644
index 00000000..e62f58ef
--- /dev/null
+++ b/services/workflows/hierarchical_workflows.py
@@ -0,0 +1,476 @@
+"""Hierarchical Agent Teams (HAT) workflow implementation.
+
+This module provides the implementation for Hierarchical Agent Teams (HAT)
+workflows where multiple specialized agents work together with a supervisor
+coordinating their activities.
+"""
+
+from typing import (
+    Annotated,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+    cast,
+    get_type_hints,
+)
+
+from langchain.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+from langgraph.channels.last_value import LastValue
+from langgraph.graph import END, START, StateGraph
+from pydantic import BaseModel, Field
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import (
+    BaseCapabilityMixin,
+    ComposableWorkflowMixin,
+    StateType,
+)
+
+
+# Define merge functions for managing parallel state updates
+def append_list_fn(key, values):
+    """Append multiple list updates."""
+    # Handle case where we're dealing with single strings or non-list values
+    result = []
+    for value in values:
+        if isinstance(value, list):
+            result.extend(value)
+        else:
+            result.append(value)
+    return list(set(result))  # Deduplicate lists
+
+
+def merge_dict_fn(key, values):
+    """Merge multiple dictionary updates."""
+    # Handle cases where we might get non-dict values
+    result = {}
+    for value in values:
+        if isinstance(value, dict):
+            result.update(value)
+        elif value is not None:
+            # Try to convert to dict if possible, otherwise use as a key
+            try:
+                result.update(dict(value))
+            except (ValueError, TypeError):
+                result[str(value)] = True
+    return result  # Combine dictionaries
+
+
+logger = configure_logger(__name__)
+
+
+class SupervisorMixin(BaseCapabilityMixin):
+    """Mixin for implementing supervisor functionality in HAT workflows.
+
+    The supervisor is responsible for routing between agents and
+    making decisions about workflow progression.
+    """
+
+    def __init__(
+        self,
+        config: Optional[Dict[str, Any]] = None,
+        routing_key: str = "next_step",
+    ):
+        """Initialize the supervisor mixin.
+
+        Args:
+            config: Configuration dictionary
+            routing_key: Key in state to use for routing
+        """
+        super().__init__(config=config, state_key=routing_key)
+        self.routing_key = routing_key
+        self.routing_map = {}
+        self.halt_condition = lambda state: False
+        # Default routing function (should be replaced with set_routing_logic)
+        self.routing_func = lambda state: "end"
+
+    def set_routing_logic(self, routing_func: Callable) -> None:
+        """Set the routing function to determine the next step.
+
+        Args:
+            routing_func: Function that takes the state and returns the next step
+        """
+        self.routing_func = routing_func
+
+    def set_halt_condition(self, halt_func: Callable) -> None:
+        """Set a condition that will halt the workflow.
+
+        Args:
+            halt_func: Function that takes the state and returns a boolean
+        """
+        self.halt_condition = halt_func
+
+    def map_step_to_node(self, step_name: str, node_name: str) -> None:
+        """Map a step name to a node name.
+
+        Args:
+            step_name: Name of the step in routing logic
+            node_name: Name of the node in the graph
+        """
+        self.routing_map[step_name] = node_name
+
+    def router(self, state: StateType) -> Union[str, List[str]]:
+        """Route to the next node(s) based on the state.
+
+        Returns either a string node name or a list of node names for parallel execution.
+        """
+        next_step = state[self.routing_key]
+        if next_step == "end" or next_step == END:
+            return END
+        return next_step
+
+    async def process(self, state: StateType) -> Dict[str, Any]:
+        """Process the current state and determine the next step.
+
+        Args:
+            state: Current workflow state
+
+        Returns:
+            Dict with next step information
+        """
+        # Check if halt condition is met
+        if self.halt_condition(state):
+            return {"next_step": END, "reason": "halt_condition_met"}
+
+        # Determine next step using routing logic
+        next_step = self.routing_func(state)
+
+        # Handle special case for END constant
+        if next_step == "end":
+            next_step = END
+
+        # Map to node name if a mapping exists
+        if isinstance(next_step, list):
+            # For parallel execution, map each item in the list
+            mapped_step = [self.routing_map.get(step, step) for step in next_step]
+        else:
+            mapped_step = self.routing_map.get(next_step, next_step)
+
+        return {
+            "next_step": mapped_step,
+            "timestamp": state.get("timestamp", ""),
+        }
+
+    def add_to_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Add the supervisor to the graph.
+
+        Args:
+            graph: StateGraph to add node to
+            **kwargs: Additional arguments
+        """
+        node_name = kwargs.get("node_name", "supervisor")
+
+        async def supervisor_node(state: StateType) -> StateType:
+            result = await self.process(state)
+            next_step = result["next_step"]
+            # Normalize "end" to END constant if needed
+            if next_step == "end":
+                next_step = END
+            state[self.routing_key] = next_step
+            return state
+
+        graph.add_node(node_name, supervisor_node)
+
+        # Define conditional edges from supervisor to other nodes
+        def router(state: StateType) -> Union[str, List[str]]:
+            next_step = state[self.routing_key]
+            # Handle both string and list cases
+            if isinstance(next_step, list):
+                return next_step
+            if next_step == "end" or next_step == END:
+                return END
+            return next_step
+
+        # Create a complete routing map that includes END
+        routing_map_with_end = {
+            **{step: step for step in self.routing_map.values()},
+            "end": END,
+            END: END,
+        }
+
+        # Add explicit entry for every node we might want to route to
+        for node in graph.nodes:
+            if (
+                node not in routing_map_with_end
+                and node != "supervisor"
+                and node != END
+            ):
+                routing_map_with_end[node] = node
+
+        # Add conditional edges with the complete routing map
+        graph.add_conditional_edges(node_name, router, routing_map_with_end)
+
+
+class HierarchicalTeamWorkflow(ComposableWorkflowMixin):
+    """Implementation of a Hierarchical Agent Team workflow.
+
+    This workflow orchestrates a team of specialized agents coordinated
+    by a supervisor to solve complex tasks.
+    """
+
+    def __init__(self, name: str = None, config: Optional[Dict[str, Any]] = None):
+        """Initialize the hierarchical team workflow.
+
+        Args:
+            name: Name identifier for this workflow
+            config: Configuration dictionary
+        """
+        super().__init__(name=name)
+        self.config = config or {}
+        self.supervisor = SupervisorMixin(config=self.config)
+        self.entry_point = None
+
+    def set_entry_point(self, node_name: str) -> None:
+        """Set the entry point for the workflow.
+
+        Args:
+            node_name: Name of the starting node
+        """
+        self.entry_point = node_name
+
+    def set_supervisor_logic(self, routing_func: Callable) -> None:
+        """Set the routing logic for the supervisor.
+
+        Args:
+            routing_func: Function that determines the next step
+        """
+        self.supervisor.set_routing_logic(routing_func)
+
+    def set_halt_condition(self, halt_func: Callable) -> None:
+        """Set a condition that will halt the workflow.
+
+        Args:
+            halt_func: Function that takes the state and returns a boolean
+        """
+        self.supervisor.set_halt_condition(halt_func)
+
+    def add_parallel_execution(
+        self, from_node: str, to_nodes: List[str], merge_node: str
+    ) -> None:
+        """Add parallel execution paths to the workflow.
+
+        Args:
+            from_node: Node where parallel execution begins
+            to_nodes: List of nodes to execute in parallel
+            merge_node: Node where results are merged
+        """
+        self.parallel_executions = {
+            "from_node": from_node,
+            "to_nodes": to_nodes,
+            "merge_node": merge_node,
+        }
+
+    def build_graph(self) -> StateGraph:
+        """Build the hierarchical team workflow graph.
+
+        Returns:
+            StateGraph: The compiled workflow graph
+        """
+        if not self.entry_point:
+            raise ValueError("Entry point must be set before building graph")
+
+        # Create graph with the appropriate state type
+        state_type = self.config.get("state_type", Dict[str, Any])
+
+        # Create graph with minimum configuration
+        graph = StateGraph(state_type)
+
+        # Get recursion limit to prevent infinite loops (will be passed to compile())
+        recursion_limit = self.config.get("recursion_limit", 10)
+        self.logger.info(f"Setting recursion limit to {recursion_limit}")
+
+        # Set up key-specific channels for concurrent updates
+        if hasattr(state_type, "__annotations__"):
+            type_hints = get_type_hints(state_type, include_extras=True)
+            for key, annotation in type_hints.items():
+                # Check if it's an Annotated type with a merge function
+                if hasattr(annotation, "__metadata__") and callable(
+                    annotation.__metadata__[-1]
+                ):
+                    merge_func = annotation.__metadata__[-1]
+                    field_type = annotation.__origin__
+                    # Use direct assignment of channels instead of config parameter
+                    if key not in graph.channels:
+                        if merge_func == append_list_fn:
+                            channel = LastValue(field_type)
+                            channel.reduce = merge_func
+                            graph.channels[key] = channel
+                        elif merge_func == merge_dict_fn:
+                            channel = LastValue(field_type)
+                            channel.reduce = merge_func
+                            graph.channels[key] = channel
+
+        # Add all sub-workflows to the graph
+        for name, workflow in self.sub_workflows.items():
+            try:
+                workflow.add_to_graph(graph, node_name=name)
+                # Map step name to node name in supervisor
+                self.supervisor.map_step_to_node(name, name)
+                self.logger.debug(f"Added sub-workflow node: {name}")
+            except Exception as e:
+                self.logger.error(
+                    f"Error adding sub-workflow {name}: {str(e)}", exc_info=True
+                )
+                raise ValueError(f"Failed to add sub-workflow {name}: {str(e)}")
+
+        # Add supervisor to graph
+        try:
+            self.supervisor.add_to_graph(graph)
+            self.logger.debug("Added supervisor node")
+        except Exception as e:
+            self.logger.error(f"Error adding supervisor: {str(e)}", exc_info=True)
+            raise ValueError(f"Failed to add supervisor: {str(e)}")
+
+        # Set entry point
+        graph.set_entry_point(self.entry_point)
+        self.logger.debug(f"Set entry point to {self.entry_point}")
+
+        # Connect entry point to supervisor
+        graph.add_edge(self.entry_point, "supervisor")
+        self.logger.debug(f"Added edge: {self.entry_point} -> supervisor")
+
+        # Add edges from all nodes to supervisor
+        for name in self.sub_workflows.keys():
+            if name != self.entry_point:
+                graph.add_edge(name, "supervisor")
+                self.logger.debug(f"Added edge: {name} -> supervisor")
+
+        # Add parallel execution if configured
+        if hasattr(self, "parallel_executions"):
+            pe = self.parallel_executions
+
+            # Define function for parallel branching
+            def branch_function(state: StateType) -> Dict:
+                """Branch to parallel nodes or return to supervisor based on state.
+
+                This returns both the next nodes and any state updates needed.
+                """
+                # For debugging, log the state keys we care about
+                self.logger.debug(
+                    f"Branch function evaluating state: "
+                    f"historical_score={state.get('historical_score') is not None}, "
+                    f"financial_score={state.get('financial_score') is not None}, "
+                    f"social_score={state.get('social_score') is not None}, "
+                    f"in_parallel={state.get('in_parallel_execution', False)}"
+                )
+
+                # Check if we're already in parallel execution
+                if state.get("in_parallel_execution", False):
+                    # Check if all parallel executions have completed
+                    all_completed = True
+                    for node_name in pe["to_nodes"]:
+                        score_key = f"{node_name.replace('_agent', '')}_score"
+                        if state.get(score_key) is None:
+                            all_completed = False
+                            break
+
+                    if all_completed:
+                        self.logger.debug(
+                            f"All parallel nodes complete, routing to {pe['merge_node']}"
+                        )
+                        # Return to merge node and clear the in_parallel_execution flag
+                        return {
+                            "nodes": [pe["merge_node"]],
+                            "state_updates": {"in_parallel_execution": False},
+                        }
+                    else:
+                        # Still waiting for some parallel nodes to complete, let supervisor route
+                        self.logger.debug(
+                            "Some parallel nodes still executing, continuing parallel processing"
+                        )
+                        # Force parallel execution to stay on
+                        return {
+                            "nodes": ["supervisor"],
+                            "state_updates": {"in_parallel_execution": True},
+                        }
+
+                # When historical_score is set but financial_score and social_score are not,
+                # we need to branch to both financial_agent and social_agent in parallel
+                elif state.get("historical_score") is not None and all(
+                    state.get(f"{node_name.replace('_agent', '')}_score") is None
+                    for node_name in pe["to_nodes"]
+                ):
+                    self.logger.debug(
+                        f"Starting parallel execution, branching to nodes: {pe['to_nodes']}"
+                    )
+                    # Set the in_parallel_execution flag to True
+                    return {
+                        "nodes": pe["to_nodes"],
+                        "state_updates": {"in_parallel_execution": True},
+                    }
+
+                # Default case, return to supervisor for normal routing
+                # Make sure we're not stuck in a loop
+                self.logger.debug("Not branching, returning to supervisor")
+
+                # We need to ensure that if historical_score exists but financial/social are missing,
+                # we maintain the parallel execution flag (this fixes the looping problem)
+                if state.get("historical_score") is not None and any(
+                    state.get(f"{node_name.replace('_agent', '')}_score") is None
+                    for node_name in pe["to_nodes"]
+                ):
+                    return {
+                        "nodes": ["supervisor"],
+                        "state_updates": {"in_parallel_execution": True},
+                    }
+
+                return {"nodes": ["supervisor"], "state_updates": {}}
+
+            # For each parallel node, map it in the supervisor
+            for node in pe["to_nodes"]:
+                self.supervisor.map_step_to_node(node, node)
+
+            # Add branching from source node
+            # We need to wrap our branch_function to handle state updates
+            def branch_wrapper(state: StateType) -> List[str]:
+                result = branch_function(state)
+                # Apply any state updates
+                for key, value in result.get("state_updates", {}).items():
+                    state[key] = value
+                # Return the nodes to route to
+                return result.get("nodes", ["supervisor"])
+
+            # Create a mapping for all possible nodes, including supervisor and END
+            branch_map = {node: node for node in pe["to_nodes"]}
+            branch_map["supervisor"] = "supervisor"
+            branch_map[pe["merge_node"]] = pe["merge_node"]
+            # Explicitly map END constant
+            branch_map[END] = END  # Ensure END is correctly mapped
+
+            # Add branching from source node using our wrapper
+            graph.add_conditional_edges(pe["from_node"], branch_wrapper, branch_map)
+            self.logger.debug(
+                f"Added conditional edges for parallel execution from {pe['from_node']}"
+            )
+
+            # Connect merge node to supervisor
+            graph.add_edge(pe["merge_node"], "supervisor")
+            self.logger.debug(f"Added edge: {pe['merge_node']} -> supervisor")
+        else:
+            # Even without explicit parallel execution, we need to make sure
+            # the supervisor can handle returning lists of nodes for parallel execution
+            self.logger.debug(
+                "No parallel execution configured, relying on supervisor for parallel routing"
+            )
+
+        # Compile the graph with the recursion limit configuration
+        compiled_graph = graph.compile(
+            name="HierarchicalTeamWorkflow",
+            checkpointer=None,
+            debug=self.config.get("debug", False),
+        )
+
+        # Pass recursion limit through with_config
+        compiled_graph = compiled_graph.with_config(
+            {"recursion_limit": recursion_limit}
+        )
+
+        self.logger.info("Compiled hierarchical team workflow graph")
+
+        # Return the compiled graph
+        return compiled_graph
diff --git a/services/workflows/proposal_evaluation.py b/services/workflows/proposal_evaluation.py
index f2ca0849..9ebfd6bb 100644
--- a/services/workflows/proposal_evaluation.py
+++ b/services/workflows/proposal_evaluation.py
@@ -1,11 +1,14 @@
 import asyncio
 import base64
-from typing import Any, Dict, List, Optional, TypedDict
+import operator
+import uuid
+from typing import Annotated, Any, Dict, List, Optional, TypedDict, Union
 
 import httpx
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import HumanMessage
 from langchain_openai import ChatOpenAI
+from langgraph.channels import LastValue
 from langgraph.graph import END, Graph, StateGraph
 from pydantic import BaseModel, Field
 
@@ -15,6 +18,7 @@
     ExtensionFilter,
     Profile,
     PromptFilter,
+    ProposalBase,
     ProposalType,
     QueueMessageFilter,
     QueueMessageType,
@@ -28,7 +32,13 @@
 from services.workflows.base import (
     BaseWorkflow,
 )
+from services.workflows.capability_mixins import BaseCapabilityMixin
 from services.workflows.chat import ChatService, StreamingCallbackHandler
+from services.workflows.hierarchical_workflows import (
+    HierarchicalTeamWorkflow,
+    append_list_fn,
+    merge_dict_fn,
+)
 from services.workflows.planning_mixin import PlanningCapability
 from services.workflows.vector_mixin import VectorRetrievalCapability
 from services.workflows.web_search_mixin import WebSearchCapability
@@ -50,881 +60,1653 @@ class ProposalEvaluationOutput(BaseModel):
     reasoning: str = Field(description="The reasoning behind the evaluation decision")
 
 
-class EvaluationState(TypedDict):
-    """State for the proposal evaluation flow."""
-
-    action_proposals_contract: str
-    action_proposals_voting_extension: str
-    proposal_id: int
-    proposal_data: Dict
-    dao_info: Optional[Dict]
-    approve: bool
-    confidence_score: float
-    reasoning: str
-    vote_result: Optional[Dict]
-    wallet_id: Optional[UUID]
-    confidence_threshold: float
-    auto_vote: bool
-    formatted_prompt: str
-    agent_prompts: List[Dict]
-    vector_results: Optional[List[Dict]]
-    recent_tweets: Optional[List[Dict]]
-    web_search_results: Optional[List[Dict]]
-    treasury_balance: Optional[float]
-    contract_source: Optional[str]
-    proposal_images: Optional[List[Dict]]  # Store encoded images for LLM
-    # Token usage tracking per step
-    web_search_token_usage: Optional[Dict]
-    evaluation_token_usage: Optional[Dict]
-    # Model info for cost calculation
-    evaluation_model_info: Optional[Dict]
-    web_search_model_info: Optional[Dict]
-
-
-class ProposalEvaluationWorkflow(
-    BaseWorkflow[EvaluationState],
-    VectorRetrievalCapability,
-    WebSearchCapability,
-    PlanningCapability,
+def no_update_reducer(current: Any, new: List[Any]) -> Any:
+    """Reducer that prevents updates after initial value is set."""
+    # Treat initial empty string for str types as if it were None for accepting the first value
+    is_initial_empty_string = isinstance(current, str) and current == ""
+
+    # If current is genuinely set (not None and not initial empty string), keep it.
+    if current is not None and not is_initial_empty_string:
+        return current
+
+    # Current is None or an initial empty string. Try to set it from new.
+    processed_new_values = (
+        new if isinstance(new, list) else [new]
+    )  # Ensure 'new' is a list
+    for n_val in processed_new_values:
+        if n_val is not None:
+            return n_val
+
+    # If current was None/initial empty string and new is all None or empty, return current (which is None or '')
+    return current
+
+
+def merge_dict_override_fn(key, values):
+    """Merge dictionaries by taking the last non-None value."""
+    # Handle case where values is None
+    if values is None:
+        return None
+
+    # Handle case where values is not iterable
+    if not hasattr(values, "__iter__"):
+        return values
+
+    result = None
+    for value in values:
+        if value is not None:
+            result = value
+    return result
+
+
+class ProposalEvaluationState(TypedDict):
+    """Type definition for the proposal evaluation state."""
+
+    proposal_id: Annotated[str, no_update_reducer]  # Read-only during execution
+    proposal_data: Annotated[str, no_update_reducer]  # Now a string, not a dict
+    core_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
+    historical_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
+    financial_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
+    social_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
+    final_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
+    flags: Annotated[List[str], append_list_fn]  # Merges lists of flags
+    summaries: Annotated[
+        Dict[str, str], merge_dict_fn
+    ]  # Merges dictionaries of summaries
+    decision: Annotated[Optional[str], merge_dict_override_fn]
+    halt: Annotated[bool, operator.or_]  # Use OR for boolean flags
+    token_usage: Annotated[
+        Dict[str, Dict[str, int]], merge_dict_fn
+    ]  # Merges nested dictionaries
+    core_agent_invocations: Annotated[int, operator.add]  # Counts should add
+    proposal_images: Annotated[
+        Optional[List[Dict]], merge_dict_override_fn
+    ]  # ADDED: To store encoded images
+
+
+class AgentOutput(BaseModel):
+    """Output model for agent evaluations."""
+
+    score: int = Field(description="Score from 0-100")
+    flags: List[str] = Field(description="Critical issues flagged")
+    summary: str = Field(description="Summary of findings")
+
+
+class FinalOutput(BaseModel):
+    """Output model for the final evaluation decision."""
+
+    score: int = Field(description="Final evaluation score")
+    decision: str = Field(description="Approve or Reject")
+    explanation: str = Field(description="Reasoning for decision")
+
+
+def update_state_with_agent_result(
+    state: ProposalEvaluationState, agent_result: Dict[str, Any], agent_name: str
 ):
-    """Workflow for evaluating DAO proposals and voting automatically."""
-
-    def __init__(
-        self,
-        collection_names: Optional[List[str]] = None,
-        model_name: str = "gpt-4.1",
-        temperature: Optional[float] = 0.1,
-        **kwargs,
+    """Helper function to update state with agent result including summaries and flags."""
+    # Update agent score in state
+    if agent_name in ["core", "historical", "financial", "social", "final"]:
+        state[f"{agent_name}_score"] = agent_result
+
+    # Update summaries
+    if "summaries" not in state:
+        state["summaries"] = {}
+
+    if "summary" in agent_result and agent_result["summary"]:
+        state["summaries"][f"{agent_name}_score"] = agent_result["summary"]
+
+    # Update flags
+    if "flags" not in state:
+        state["flags"] = []
+
+    if "flags" in agent_result and isinstance(agent_result["flags"], list):
+        state["flags"].extend(agent_result["flags"])
+
+    # Update token usage
+    if (
+        "token_usage" in state
+        and isinstance(state["token_usage"], dict)
+        and f"{agent_name}_agent" in state["token_usage"]
     ):
-        """Initialize the workflow.
-
-        Args:
-            collection_names: Optional list of collection names to search
-            model_name: The model to use for evaluation
-            temperature: Optional temperature setting for the model
-            **kwargs: Additional arguments passed to parent
-        """
-        # Initialize planning LLM
-        planning_llm = ChatOpenAI(
-            model="o4-mini",
-            stream_usage=True,
-            streaming=True,
-        )
+        # Token usage has been set by the agent directly
+        pass
+    elif hasattr(agent_result, "get") and agent_result.get("token_usage"):
+        # Token usage available in the result
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+        state["token_usage"][f"{agent_name}_agent"] = agent_result.get("token_usage")
+
+    return state
+
+
+class CoreContextAgent(BaseCapabilityMixin, VectorRetrievalCapability):
+    """Core Context Agent evaluates proposals against DAO mission and standards."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Core Context Agent."""
+        BaseCapabilityMixin.__init__(self, config=config, state_key="core_score")
+        VectorRetrievalCapability.__init__(self)
+        self.initialize()
+        self._initialize_vector_capability()
+
+    def _initialize_vector_capability(self):
+        """Initialize the vector retrieval functionality."""
+        if not hasattr(self, "retrieve_from_vector_store"):
+            self.retrieve_from_vector_store = (
+                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
+                    self, self.__class__
+                )
+            )
+            self.logger.info(
+                "Initialized vector retrieval capability for CoreContextAgent"
+            )
 
-        # Create callback handler for planning with queue
-        callback_handler = StreamingCallbackHandler(queue=asyncio.Queue())
+    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
+        """Evaluate the proposal against DAO core mission and standards."""
+        self._initialize_vector_capability()
 
-        # Initialize all parent classes including PlanningCapability
-        super().__init__(model_name=model_name, temperature=temperature, **kwargs)
-        PlanningCapability.__init__(
-            self,
-            callback_handler=callback_handler,
-            planning_llm=planning_llm,
-            persona="You are a DAO proposal evaluation planner, focused on creating structured evaluation plans.",
-        )
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
 
-        self.collection_names = collection_names or [
-            "knowledge_collection",
-            "proposals",
-        ]
-        self.required_fields = ["proposal_id", "proposal_data"]
-        self.logger.debug(
-            f"Initialized workflow: collections={self.collection_names} | model={model_name} | temperature={temperature}"
-        )
+        dao_mission_text = self.config.get("dao_mission", "")
+        if not dao_mission_text:
+            try:
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Attempting to retrieve DAO mission from vector store"
+                )
+                dao_mission = await self.retrieve_from_vector_store(
+                    query="DAO mission statement and values",
+                    collection_name=self.config.get(
+                        "mission_collection", "dao_documents"
+                    ),
+                    limit=3,
+                )
+                dao_mission_text = "\n".join([doc.page_content for doc in dao_mission])
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Retrieved DAO mission, length: {len(dao_mission_text)}"
+                )
+            except Exception as e:
+                self.logger.error(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Error retrieving DAO mission: {str(e)}",
+                    exc_info=True,
+                )
+                dao_mission_text = "Elevate human potential through AI on Bitcoin"
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Using default DAO mission: {dao_mission_text}"
+                )
 
-    def _create_prompt(self) -> PromptTemplate:
-        """Create the evaluation prompt template."""
-        return PromptTemplate(
-            input_variables=[
-                "proposal_data",
-                "dao_info",
-                "treasury_balance",
-                "contract_source",
-                "agent_prompts",
-                "vector_context",
-                "recent_tweets",
-                "web_search_results",
-            ],
-            template="""
-            You are a skeptical and hard-to-convince DAO proposal evaluator. Your primary goal is rigorous analysis. Your task is to analyze the proposal and determine whether to vote FOR or AGAINST it based on verifiable evidence and alignment with DAO principles.
-
-            <instructions>
-            <high_priority_instructions importance="critical">
-            {agent_prompts}
-            </high_priority_instructions>
-            <default_instructions>
-            If no agent-specific instructions are provided, apply these DEFAULT instructions:
-            - Approve ONLY if the proposal provides verifiable evidence (URL, transaction hash, IPFS CID for screenshots/documents) for its claims OR if it's a purely logistical matter (e.g., scheduling reminder).
-            - All other proposals lacking verifiable evidence for claims should be REJECTED (vote AGAINST) with LOW confidence (0.3-0.4 band).
-            - Reject proposals making promises about future DAO actions or events unless they provide on-chain evidence of a corresponding approved governance decision or multisig transaction proposal.
-            - CRITICAL: You MUST evaluate all proposal content (text, images, links) as ONE COHESIVE UNIT. If ANY image or attachment doesn't align with or support the proposal, contains misleading information, or is inappropriate, you MUST reject the entire proposal.
-            </default_instructions>
-            You MUST explain how each specific instruction (agent-provided or default) influenced your decision, especially if it led to rejection.
-            </instructions>
-
-            <evaluation_criteria>
-            <core_proposals>
-                <security_criteria>
-                    <criterion>Verify smart contract security measures</criterion>
-                    <criterion>Check for potential vulnerabilities in contract logic</criterion>
-                    <criterion>Assess potential attack vectors</criterion>
-                    <criterion>Evaluate access control mechanisms</criterion>
-                </security_criteria>
-                <alignment_criteria>
-                    <criterion>Analyze alignment with DAO mission statement</criterion>
-                    <criterion>Verify compatibility with existing DAO infrastructure</criterion>
-                    <criterion>Check adherence to DAO's established governance principles</criterion>
-                </alignment_criteria>
-                <impact_criteria>
-                    <criterion>Evaluate potential risks vs. rewards</criterion>
-                    <criterion>Assess short-term and long-term implications</criterion>
-                    <criterion>Consider effects on DAO reputation and stakeholders</criterion>
-                </impact_criteria>
-            </core_proposals>
-            <action_proposals>
-                <validation_criteria>
-                    <criterion>Validate all proposed parameters against acceptable ranges</criterion>
-                    <criterion>Verify parameter compatibility with existing systems</criterion>
-                    <criterion>Check for realistic implementation timelines</criterion>
-                </validation_criteria>
-                <resource_criteria>
-                    <criterion>Assess treasury impact and funding requirements</criterion>
-                    <criterion>Evaluate operational resource needs</criterion>
-                    <criterion>Consider opportunity costs against other initiatives</criterion>
-                </resource_criteria>
-                <security_criteria>
-                    <criterion>Identify potential security implications of the action</criterion>
-                    <criterion>Check for unintended system vulnerabilities</criterion>
-                </security_criteria>
-                <evidence_criteria>
-                    <criterion importance="critical">**Evidence Verification:** All claims MUST be backed by verifiable sources (URLs, transaction hashes, IPFS CIDs)</criterion>
-                    <criterion importance="critical">**Future Commitments:** Any promises about future actions require on-chain proof of approved governance decisions</criterion>
-                    <criterion importance="critical">**Content Cohesion:** All components (text, images, links) must form a cohesive, aligned whole supporting the proposal's intent</criterion>
-                </evidence_criteria>
-            </action_proposals>
-            </evaluation_criteria>
-
-            <proposal_content>
-            <proposal_data>
-            {proposal_data}
-            </proposal_data>
-            <proposal_instructions>
-            Note: If any images are provided with the proposal, they will be shown after this prompt.
-            You should analyze any provided images in the context of the proposal and include your observations
-            in your evaluation. Consider aspects such as:
-            - Image content and relevance to the proposal
-            - Any visual evidence supporting or contradicting the proposal
-            - Quality and authenticity of the images
-            - Potential security or privacy concerns in the images
-
-            IMPORTANT: Images and text must form a cohesive whole. If any image:
-            - Doesn't clearly support or relate to the proposal text
-            - Contains misleading or contradictory information
-            - Is of poor quality making verification impossible
-            - Contains inappropriate content
-            - Appears manipulated or false
-            Then you MUST reject the entire proposal, regardless of the quality of the text portion.
-            </proposal_instructions>
-            </proposal_content>
-            <additional_context>
-            <vector_context>
-            {vector_context}
-            </vector_context>
-            <recent_tweets>
-            {recent_tweets}
-            </recent_tweets>
-            <web_search_results>
-            {web_search_results}
-            </web_search_results>
-            </additional_context>
-
-            <dao_context>
-            <dao_info>
-            {dao_info}
-            </dao_info>
-            <treasury_balance>
-            {treasury_balance}
-            </treasury_balance>
-            <aibtc_charter>
-            Core Values: Curiosity, Truth Maximizing, Humanity's Best Interests, Transparency, Resilience, Collaboration
-            Mission: Elevate human potential through Autonomous Intelligence on Bitcoin
-            Guardrails: Decentralized Governance, Smart Contract accountability
-            </aibtc_charter>
-            </dao_context>
-
-            <technical_details>
-            <contract_source>
-            {contract_source}
-            </contract_source>
-            </technical_details>
-
-            <confidence_scoring>
-            <confidence_bands>
-            You MUST choose one of these confidence bands:
-            - **0.9-1.0 (Very High Confidence - Strong Approve):** All criteria met excellently. Clear alignment with DAO mission/values, strong verifiable evidence provided for all claims, minimal/no security risks identified, significant positive impact expected, and adheres strictly to all instructions (including future promise verification). All images directly support the proposal with high quality and authenticity.
-            - **0.7-0.8 (High Confidence - Approve):** Generally meets criteria well. Good alignment, sufficient verifiable evidence provided, risks identified but deemed manageable/acceptable, likely positive impact. Passes core checks (evidence, future promises). Minor reservations might exist but don't fundamentally undermine the proposal. Images support the proposal appropriately.
-            - **0.5-0.6 (Moderate Confidence - Borderline/Weak Approve):** Meets minimum criteria but with notable reservations. Alignment is present but perhaps weak or indirect, evidence meets minimum verification but might be incomplete or raise minor questions, moderate risks identified requiring monitoring, impact is unclear or modest. *Could apply to simple logistical proposals with no major claims.* Any included images are relevant though may not provide strong support.
-            - **0.3-0.4 (Low Confidence - Reject):** Fails one or more key criteria. Significant misalignment, **lacks required verifiable evidence** for claims (triggering default rejection), unacceptable risks identified, potential negative impact, or **contains unsubstantiated future promises**. Images may be missing where needed, irrelevant, or only weakly supportive. *This is the default band for rejections due to lack of evidence or unproven future commitments.*
-            - **0.0-0.2 (Extremely Low Confidence - Strong Reject):** Fails multiple critical criteria. Clear violation of DAO principles/guardrails, major security flaws identified, evidence is demonstrably false or misleading, significant negative impact is highly likely or certain. Any included images may be misleading, manipulated, inappropriate, or contradictory to the proposal.
-            </confidence_bands>
-            </confidence_scoring>
-
-            <quality_standards>
-            Your evaluation must uphold clarity, reasoning, and respect for the DAO's voice:
-            • Be clear and specific — avoid vagueness or filler
-            • Use a consistent tone, but reflect the DAO's personality if known
-            • Avoid casual throwaway phrases, sarcasm, or hype
-            • Don't hedge — take a position and justify it clearly
-            • Make every point logically sound and backed by facts or context
-            • Cite relevant parts of the proposal, DAO mission, or prior actions
-            • Use terms accurately — don't fake precision
-            • Keep structure clean and easy to follow
-            • Include analysis of any provided images and their implications
-            • Specifically address image-text cohesion in your analysis
-            • If rejecting, CLEARLY state the specific reason(s) based on the instructions or evaluation criteria (e.g., "Rejected due to lack of verifiable source for claim X", "Rejected because future promise lacks on-chain evidence", "Rejected because included image contradicts proposal text").
-            </quality_standards>
-
-            <output_format>
-            Provide your evaluation in this exact JSON format:
-            ```json
-            {{
-                "approve": boolean,  // true for FOR, false for AGAINST
-                "confidence_score": float,  // MUST be from the confidence bands above
-                "reasoning": string  // Brief, professional explanation addressing:
-                                   // 1. How agent/default instructions were applied (state which).
-                                   // 2. Specific reason for rejection if applicable, referencing the unmet criteria or instruction.
-                                   // 3. How DAO context influenced decision.
-                                   // 4. How AIBTC Charter alignment was considered.
-                                   // 5. Key factors in confidence score selection.
-                                   // 6. Analysis of any provided images and their cohesion with proposal text.
-                                   // Must be clear, precise, and well-structured.
-            }}
-            ```
-            </output_format>
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "dao_mission"],
+            template="""Evaluate the following proposal against the DAO's mission and values.\\n            
+Proposal: {proposal_data}\\nDAO Mission: {dao_mission}\\n
+Assess whether this proposal aligns with the DAO's core mission and values.\\nConsider:\\n1. Mission Alignment: Does it directly support the stated mission?\\n2. Quality Standards: Does it meet quality requirements?\\n3. Innovation: Does it bring new ideas aligned with our vision?\\n4. Impact: How significant is its potential contribution?\\n
+# ADDED: Image processing instructions
+**Image Analysis Instructions:**
+If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
+- Relevance: Does each image directly relate to and support the proposal's text?
+- Evidence: Do the images provide visual evidence for claims made in the proposal?
+- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
+- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
+
+Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
             """,
         )
 
-    def _create_graph(self) -> Graph:
-        """Create the evaluation graph."""
-        prompt = self._create_prompt()
+        try:
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] Formatting prompt for evaluation"
+            )
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                dao_mission=dao_mission_text
+                or "Elevate human potential through AI on Bitcoin",
+            )
+            debug_level = self.config.get("debug_level", 0)
+            if debug_level >= 2:
+                self.logger.debug(
+                    f"[PROPOSAL_DEBUG:CoreAgent] FULL EVALUATION PROMPT:\n{formatted_prompt_text}"
+                )
+            else:
+                self.logger.debug(
+                    f"[PROPOSAL_DEBUG:CoreAgent] Generated evaluation prompt: {formatted_prompt_text}"
+                )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:CoreAgent:{proposal_id}] Error formatting prompt: {str(e)}",
+                exc_info=True,
+            )
+            formatted_prompt_text = f"Evaluate proposal: {proposal_content}"
+
+        try:
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] Invoking LLM for core evaluation"
+            )
+
+            # ADDED: Image handling
+            proposal_images_list = state.get("proposal_images", [])
+            if not isinstance(proposal_images_list, list):
+                self.logger.warning(
+                    f"[DEBUG:CoreAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
+                )
+                proposal_images_list = []
 
-        async def fetch_context(state: EvaluationState) -> EvaluationState:
-            """Fetch context including web search, vector results, tweets, and contract source."""
-            try:
-                # --- Fetch Core Data --- #
-                proposal_id = state["proposal_id"]
-                dao_id = state.get("dao_id")
-                agent_id = state.get("agent_id")
-
-                # Get proposal data
-                proposal_data = backend.get_proposal(proposal_id)
-                if not proposal_data:
-                    raise ValueError(f"Proposal {proposal_id} not found")
-
-                image_urls = extract_image_urls(proposal_data.parameters)
-
-                # Process and encode images
-                proposal_images = []
-                for url in image_urls:
-                    try:
-                        async with httpx.AsyncClient() as client:
-                            response = await client.get(url, timeout=10.0)
-                            if response.status_code == 200:
-                                image_data = base64.b64encode(response.content).decode(
-                                    "utf-8"
-                                )
-                                # Determine MIME type based on URL extension
-                                mime_type = (
-                                    "image/jpeg"
-                                    if url.lower().endswith((".jpg", ".jpeg"))
-                                    else (
-                                        "image/png"
-                                        if url.lower().endswith(".png")
-                                        else (
-                                            "image/gif"
-                                            if url.lower().endswith(".gif")
-                                            else (
-                                                "image/webp"
-                                                if url.lower().endswith(".webp")
-                                                else "image/png"
-                                            )
-                                        )
-                                    )  # default to PNG if unknown
-                                )
-                                proposal_images.append(
-                                    {
-                                        "type": "image_url",
-                                        "image_url": {
-                                            "url": f"data:{mime_type};base64,{image_data}"
-                                        },
-                                    }
-                                )
-                            else:
-                                logger.warning(
-                                    f"Failed to fetch image: {url} (status {response.status_code})"
-                                )
-                    except Exception as e:
-                        logger.error(
-                            f"Error fetching image {url}: {str(e)}", exc_info=True
-                        )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+            if proposal_images_list:
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
+                )
+                message_content_list.extend(proposal_images_list)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] LLM returned core evaluation with score: {result.score}"
+            )
+            self.logger.info(
+                f"[DEBUG:CoreAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
+            )
+
+            # Track token usage - extract directly from LLM if available
+            token_usage_data = {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            }
 
-                state["proposal_images"] = proposal_images
-
-                # Convert proposal data to dictionary
-                proposal_dict = {
-                    "proposal_id": proposal_data.proposal_id,
-                    "parameters": proposal_data.parameters,
-                    "action": proposal_data.action,
-                    "caller": proposal_data.caller,
-                    "contract_principal": proposal_data.contract_principal,
-                    "creator": proposal_data.creator,
-                    "created_at_block": proposal_data.created_at_block,
-                    "end_block": proposal_data.end_block,
-                    "start_block": proposal_data.start_block,
-                    "liquid_tokens": proposal_data.liquid_tokens,
-                    "type": proposal_data.type,
-                    "proposal_contract": proposal_data.proposal_contract,
+            # Use the Annotated operator.add feature by assigning 1 to increment
+            # This is safe with concurrent execution
+            state["core_agent_invocations"] = 1
+
+            # Try to extract token usage directly from LLM response
+            if (
+                hasattr(self.llm, "_last_prompt_id")
+                and hasattr(self.llm, "client")
+                and hasattr(self.llm.client, "usage_by_prompt_id")
+            ):
+                last_prompt_id = self.llm._last_prompt_id
+                if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                    token_usage_data = {
+                        "input_tokens": usage.get("prompt_tokens", 0),
+                        "output_tokens": usage.get("completion_tokens", 0),
+                        "total_tokens": usage.get("total_tokens", 0),
+                    }
+                    self.logger.debug(
+                        f"[DEBUG:CoreAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
+                    )
+            # Fallback to estimation
+            if token_usage_data["total_tokens"] == 0:
+                # Get model name from LLM
+                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+                # First calculate token count from the text
+                token_count = len(formatted_prompt_text) // 4  # Simple estimation
+                # Create token usage dictionary for calculate_token_cost
+                token_usage_dict = {"input_tokens": token_count}
+                # Calculate cost
+                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+                token_usage_data = {
+                    "input_tokens": token_count,
+                    "output_tokens": len(result.model_dump_json())
+                    // 4,  # rough estimate
+                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
                 }
-                state["proposal_data"] = proposal_dict  # Update state with full data
-
-                # Get DAO info (if dao_id wasn't passed explicitly, use proposal's)
-                if not dao_id and proposal_data.dao_id:
-                    dao_id = proposal_data.dao_id
-                    state["dao_id"] = dao_id  # Update state if derived
-
-                dao_info = None
-                if dao_id:
-                    dao_info = backend.get_dao(dao_id)
-                if not dao_info:
-                    raise ValueError(f"DAO Information not found for ID: {dao_id}")
-                state["dao_info"] = dao_info.model_dump()
-
-                # Get agent prompts
-                agent_prompts_text = []
-                if agent_id:
-                    try:
-                        prompts = backend.list_prompts(
-                            PromptFilter(
-                                agent_id=agent_id,
-                                dao_id=dao_id,
-                                is_active=True,
-                            )
-                        )
-                        agent_prompts_text = [p.prompt_text for p in prompts]
-                    except Exception as e:
-                        self.logger.error(
-                            f"Failed to get agent prompts: {str(e)}", exc_info=True
-                        )
-                state["agent_prompts"] = agent_prompts_text
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
+                )
 
-                # Get treasury balance
-                treasury_balance = None
-                try:
-                    treasury_extensions = backend.list_extensions(
-                        ExtensionFilter(dao_id=dao_info.id, type="EXTENSIONS_TREASURY")
-                    )
-                    if treasury_extensions:
-                        hiro_api = HiroApi()
-                        treasury_balance = hiro_api.get_address_balance(
-                            treasury_extensions[0].contract_principal
-                        )
-                    else:
-                        self.logger.warning(
-                            f"No treasury extension for DAO {dao_info.id}"
-                        )
-                except Exception as e:
-                    self.logger.error(
-                        f"Failed to get treasury balance: {str(e)}", exc_info=True
+            # Add token usage to state
+            if "token_usage" not in state:
+                state["token_usage"] = {}
+            state["token_usage"]["core_agent"] = token_usage_data
+
+            result_dict = result.model_dump()
+            # Update state with the result
+            update_state_with_agent_result(state, result_dict, "core")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:CoreAgent:{proposal_id}] Error in core evaluation: {str(e)}",
+                exc_info=True,
+            )
+            fallback_score_dict = {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
+            self.logger.info(
+                f"[DEBUG:CoreAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
+            )
+            return fallback_score_dict
+
+
+class HistoricalContextAgent(BaseCapabilityMixin, VectorRetrievalCapability):
+    """Historical Context Agent examines past proposals and patterns."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        BaseCapabilityMixin.__init__(self, config=config, state_key="historical_score")
+        VectorRetrievalCapability.__init__(self)
+        self.initialize()
+        self._initialize_vector_capability()
+
+    def _initialize_vector_capability(self):
+        if not hasattr(self, "retrieve_from_vector_store"):
+            self.retrieve_from_vector_store = (
+                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
+                    self, self.__class__
+                )
+            )
+            self.logger.info(
+                "Initialized vector retrieval capability for HistoricalContextAgent"
+            )
+
+    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
+        proposal_id = state.get("proposal_id", "unknown")
+        self._initialize_vector_capability()
+        proposal_content = state.get("proposal_data", "")
+
+        historical_text = ""
+        try:
+            self.logger.debug(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Searching for similar proposals: {proposal_content[:50]}..."
+            )
+            similar_proposals = await self.retrieve_from_vector_store(
+                query=f"Proposals similar to: {proposal_content}",
+                collection_name=self.config.get(
+                    "proposals_collection", "past_proposals"
+                ),
+                limit=5,
+            )
+            historical_text = "\n".join([doc.page_content for doc in similar_proposals])
+            self.logger.debug(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Found {len(similar_proposals)} similar proposals"
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error retrieving historical proposals: {str(e)}",
+                exc_info=True,
+            )
+            historical_text = "No similar historical proposals found."
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "historical_proposals"],
+            template="""Analyze this proposal in the context of historical patterns and similar past proposals.\\n            
+Current Proposal: {proposal_data}\\nSimilar Past Proposals: {historical_proposals}\\n
+Evaluate:\\n1. Precedent: Have similar proposals been approved or rejected?\\n2. Cross-DAO Similarities: How does this compare to proposals in similar DAOs?\\n3. Learning from Past: Does it address issues from past proposals?\\n4. Uniqueness: Is this novel or repeating past ideas?\\n
+# ADDED: Image processing instructions
+**Image Analysis Instructions:**
+If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
+- Relevance: Does each image directly relate to and support the proposal's text?
+- Evidence: Do the images provide visual evidence for claims made in the proposal?
+- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
+- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
+
+Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
+            """,
+        )
+        try:
+            self.logger.debug(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Formatting prompt"
+            )
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                historical_proposals=historical_text
+                or "No similar historical proposals found.",
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error formatting prompt: {str(e)}",
+                exc_info=True,
+            )
+            formatted_prompt_text = f"Analyze proposal: {proposal_content}"
+        try:
+            self.logger.debug(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Invoking LLM for historical evaluation"
+            )
+
+            # ADDED: Image handling
+            proposal_images_list = state.get("proposal_images", [])
+            if not isinstance(proposal_images_list, list):
+                self.logger.warning(
+                    f"[DEBUG:HistoricalAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
+                )
+                proposal_images_list = []
+
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+            if proposal_images_list:
+                self.logger.debug(
+                    f"[DEBUG:HistoricalAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
+                )
+                message_content_list.extend(proposal_images_list)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            self.logger.info(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
+            )
+
+            # Track token usage - extract directly from LLM if available
+            token_usage_data = {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            }
+
+            # Try to extract token usage directly from LLM response
+            if (
+                hasattr(self.llm, "_last_prompt_id")
+                and hasattr(self.llm, "client")
+                and hasattr(self.llm.client, "usage_by_prompt_id")
+            ):
+                last_prompt_id = self.llm._last_prompt_id
+                if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                    token_usage_data = {
+                        "input_tokens": usage.get("prompt_tokens", 0),
+                        "output_tokens": usage.get("completion_tokens", 0),
+                        "total_tokens": usage.get("total_tokens", 0),
+                    }
+                    self.logger.debug(
+                        f"[DEBUG:HistoricalAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
                     )
-                state["treasury_balance"] = treasury_balance
-                # --- End Fetch Core Data --- #
+            # Fallback to estimation
+            if token_usage_data["total_tokens"] == 0:
+                # Get model name from LLM
+                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+                # First calculate token count from the text
+                token_count = len(formatted_prompt_text) // 4  # Simple estimation
+                # Create token usage dictionary for calculate_token_cost
+                token_usage_dict = {"input_tokens": token_count}
+                # Calculate cost
+                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+                token_usage_data = {
+                    "input_tokens": token_count,
+                    "output_tokens": len(result.model_dump_json())
+                    // 4,  # rough estimate
+                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                }
+                self.logger.debug(
+                    f"[DEBUG:HistoricalAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
+                )
+
+            # Add token usage to state
+            if "token_usage" not in state:
+                state["token_usage"] = {}
+            state["token_usage"]["historical_agent"] = token_usage_data
+
+            result_dict = result.model_dump()
+            # Update state with the result
+            update_state_with_agent_result(state, result_dict, "historical")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error in historical evaluation: {str(e)}",
+                exc_info=True,
+            )
+            fallback_score_dict = {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
+            self.logger.info(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
+            )
+            return fallback_score_dict
 
-                # Use mixin capabilities for web search and vector retrieval
-                web_search_query = f"DAO proposal {proposal_dict.get('type', 'unknown')} - {proposal_dict.get('parameters', '')}"
 
-                # Fetch web search results and token usage
-                web_search_results, web_search_token_usage = await self.search_web(
-                    query=web_search_query,
-                    search_context_size="medium",
+class FinancialContextAgent(BaseCapabilityMixin):
+    """Financial Context Agent evaluates treasury impact and financial viability."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__(config=config, state_key="financial_score")
+        self.initialize()
+
+    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
+        proposal_id = state.get("proposal_id", "unknown")
+        treasury_balance = state.get(
+            "treasury_balance", self.config.get("treasury_balance", 1000000)
+        )
+        proposal_content = state.get("proposal_data", "")
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "treasury_balance"],
+            template="""Assess the financial aspects of this proposal.\\n            
+Proposal: {proposal_data}\\nCurrent Treasury Balance: {treasury_balance}\\n
+Evaluate:\\n1. Cost-Benefit Analysis: Is the ROI reasonable?\\n2. Treasury Impact: What percentage of treasury would this use?\\n3. Budget Alignment: Does it align with budget priorities?\\n4. Projected Impact: What's the expected financial outcome?\\n5. Risk Assessment: What financial risks might arise?\\n
+# ADDED: Image processing instructions
+**Image Analysis Instructions:**
+If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
+- Relevance: Does each image directly relate to and support the proposal's text?
+- Evidence: Do the images provide visual evidence for claims made in the proposal (e.g., screenshots of transactions, diagrams of financial models if applicable)?
+- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
+- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
+
+Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
+            """,
+        )
+        try:
+            self.logger.debug(
+                f"[DEBUG:FinancialAgent:{proposal_id}] Formatting prompt for financial evaluation"
+            )
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                treasury_balance=treasury_balance,
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:FinancialAgent:{proposal_id}] Error formatting prompt: {str(e)}",
+                exc_info=True,
+            )
+            formatted_prompt_text = (
+                f"Assess financial aspects of proposal: {proposal_content}"
+            )
+        try:
+            self.logger.debug(
+                f"[DEBUG:FinancialAgent:{proposal_id}] Invoking LLM for financial evaluation"
+            )
+
+            # ADDED: Image handling
+            proposal_images = state.get("proposal_images", [])
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+            if proposal_images:
+                logger.debug(
+                    f"[DEBUG:FinancialAgent:{proposal_id}] Adding {len(proposal_images)} images to LLM input."
                 )
-                state["web_search_results"] = web_search_results
-                state["web_search_token_usage"] = web_search_token_usage
-                # Store web search model info (assuming gpt-4.1 as used in mixin)
-                state["web_search_model_info"] = {
-                    "name": "gpt-4.1",
-                    "temperature": None,
-                }
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            self.logger.info(
+                f"[DEBUG:FinancialAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
+            )
+
+            # Track token usage - extract directly from LLM if available
+            token_usage_data = {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            }
 
-                vector_search_query = f"Proposal type: {proposal_dict.get('type')} - {proposal_dict.get('parameters', '')}"
-                state["vector_results"] = await self.retrieve_from_vector_store(
-                    query=vector_search_query, limit=5
+            # Try to extract token usage directly from LLM response
+            if (
+                hasattr(self.llm, "_last_prompt_id")
+                and hasattr(self.llm, "client")
+                and hasattr(self.llm.client, "usage_by_prompt_id")
+            ):
+                last_prompt_id = self.llm._last_prompt_id
+                if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                    token_usage_data = {
+                        "input_tokens": usage.get("prompt_tokens", 0),
+                        "output_tokens": usage.get("completion_tokens", 0),
+                        "total_tokens": usage.get("total_tokens", 0),
+                    }
+                    self.logger.debug(
+                        f"[DEBUG:FinancialAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
+                    )
+            # Fallback to estimation
+            if token_usage_data["total_tokens"] == 0:
+                # Get model name from LLM
+                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+                # First calculate token count from the text
+                token_count = len(formatted_prompt_text) // 4  # Simple estimation
+                # Create token usage dictionary for calculate_token_cost
+                token_usage_dict = {"input_tokens": token_count}
+                # Calculate cost
+                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+                token_usage_data = {
+                    "input_tokens": token_count,
+                    "output_tokens": len(result.model_dump_json())
+                    // 4,  # rough estimate
+                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                }
+                self.logger.debug(
+                    f"[DEBUG:FinancialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
                 )
 
-                # Fetch recent tweets
-                recent_tweets = []
-                if dao_id:
-                    try:
-                        self.logger.debug(f"Fetching tweets for DAO ID: {dao_id}")
-                        queue_messages = backend.list_queue_messages(
-                            QueueMessageFilter(
-                                type=QueueMessageType.TWEET,
-                                dao_id=dao_id,
-                                is_processed=True,
-                            )
-                        )
-                        sorted_messages = sorted(
-                            queue_messages, key=lambda x: x.created_at, reverse=True
-                        )[:5]
-                        recent_tweets = [
-                            {
-                                "created_at": msg.created_at,
-                                "message": (
-                                    msg.message.get("message", "No text available")
-                                    if isinstance(msg.message, dict)
-                                    else msg.message
-                                ),
-                                "tweet_id": msg.tweet_id,
-                            }
-                            for msg in sorted_messages
-                        ]
-                    except Exception as e:
-                        self.logger.error(
-                            f"Failed to fetch tweets: {str(e)}", exc_info=True
-                        )
-                state["recent_tweets"] = recent_tweets
-
-                # Fetch contract source for core proposals
-                contract_source = ""
-                if proposal_dict.get("type") == ProposalType.CORE and proposal_dict.get(
-                    "proposal_contract"
-                ):
-                    parts = proposal_dict["proposal_contract"].split(".")
-                    if len(parts) >= 2:
-                        try:
-                            api = HiroApi()
-                            result = api.get_contract_source(parts[0], parts[1])
-                            contract_source = result.get("source", "")
-                        except Exception as e:
-                            self.logger.error(
-                                f"Failed to fetch contract source: {str(e)}",
-                                exc_info=True,
-                            )
-                    else:
-                        self.logger.warning(
-                            f"Invalid contract format: {proposal_dict['proposal_contract']}"
-                        )
-                state["contract_source"] = contract_source
-
-                # Validate proposal data structure (moved from entry point)
-                proposal_type = proposal_dict.get("type")
-                if proposal_type == ProposalType.ACTION and not proposal_dict.get(
-                    "parameters"
-                ):
-                    raise ValueError("Action proposal missing parameters")
-                if proposal_type == ProposalType.CORE and not proposal_dict.get(
-                    "proposal_contract"
-                ):
-                    raise ValueError("Core proposal missing proposal_contract")
-
-                return state
-            except Exception as e:
-                self.logger.error(f"Error in fetch_context: {str(e)}", exc_info=True)
-                state["reasoning"] = f"Error fetching context: {str(e)}"
-                # Propagate error state
-                return state
-
-        async def format_evaluation_prompt(state: EvaluationState) -> EvaluationState:
-            """Format the evaluation prompt using the fetched context."""
-            if "reasoning" in state and "Error" in state["reasoning"]:
-                return state  # Skip if context fetching failed
-            try:
-                # Extract data from state for easier access
-                proposal_data = state["proposal_data"]
-                dao_info = state.get("dao_info", {})
-                treasury_balance = state.get("treasury_balance")
-                contract_source = state.get("contract_source", "")
-                agent_prompts = state.get("agent_prompts", [])
-                vector_results = state.get("vector_results", [])
-                recent_tweets = state.get("recent_tweets", [])
-                web_search_results = state.get("web_search_results", [])
-
-                # Format agent prompts
-                agent_prompts_str = "No agent-specific instructions available."
-                if agent_prompts:
-                    if isinstance(agent_prompts, list):
-                        agent_prompts_str = "\n\n".join(agent_prompts)
-                    else:
-                        self.logger.warning(
-                            f"Invalid agent prompts: {type(agent_prompts)}"
-                        )
+            # Add token usage to state
+            if "token_usage" not in state:
+                state["token_usage"] = {}
+            state["token_usage"]["financial_agent"] = token_usage_data
+
+            result_dict = result.model_dump()
+            # Update state with the result
+            update_state_with_agent_result(state, result_dict, "financial")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:FinancialAgent:{proposal_id}] Error in financial evaluation: {str(e)}",
+                exc_info=True,
+            )
+            fallback_score_dict = {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
+            self.logger.info(
+                f"[DEBUG:FinancialAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
+            )
+            return fallback_score_dict
 
-                # Format web search results
-                web_search_content = "No relevant web search results found."
-                if web_search_results:
-                    # Create structured XML format for each web search result
-                    web_search_items = []
-                    for i, res in enumerate(web_search_results):
-                        source_url = (
-                            res.get("metadata", {})
-                            .get("source_urls", [{}])[0]
-                            .get("url", "Unknown")
-                        )
-                        web_search_items.append(
-                            f"<search_result>\n<result_number>{i+1}</result_number>\n<content>{res.get('page_content', '')}</content>\n<source>{source_url}</source>\n</search_result>"
-                        )
-                    web_search_content = "\n".join(web_search_items)
-
-                # Format vector context
-                vector_context = "No additional context available from vector store."
-                if vector_results:
-                    # Create structured XML format for each vector result
-                    vector_items = []
-                    for i, doc in enumerate(vector_results):
-                        vector_items.append(
-                            f"<vector_item>\n<item_number>{i+1}</item_number>\n<content>{doc.page_content}</content>\n</vector_item>"
-                        )
-                    vector_context = "\n".join(vector_items)
-
-                # Format recent tweets
-                tweets_content = "No recent DAO tweets found."
-                if recent_tweets:
-                    # Create structured XML format for each tweet
-                    tweet_items = []
-                    for i, tweet in enumerate(recent_tweets):
-                        tweet_items.append(
-                            f"<tweet>\n<tweet_number>{i+1}</tweet_number>\n<date>{tweet['created_at']}</date>\n<message>{tweet['message']}</message>\n</tweet>"
-                        )
-                    tweets_content = "\n".join(tweet_items)
-
-                # Convert JSON objects to formatted text
-                # Format proposal_data
-                proposal_data_str = "No proposal data available."
-                if proposal_data:
-                    proposal_data_str = "\n".join(
-                        [
-                            f"Proposal ID: {proposal_data.get('proposal_id', 'Unknown')}",
-                            f"Type: {proposal_data.get('type', 'Unknown')}",
-                            f"Action: {proposal_data.get('action', 'Unknown')}",
-                            f"Parameters: {proposal_data.get('parameters', 'None')}",
-                            f"Creator: {proposal_data.get('creator', 'Unknown')}",
-                            f"Contract Principal: {proposal_data.get('contract_principal', 'Unknown')}",
-                            f"Start Block: {proposal_data.get('start_block', 'Unknown')}",
-                            f"End Block: {proposal_data.get('end_block', 'Unknown')}",
-                            f"Created at Block: {proposal_data.get('created_at_block', 'Unknown')}",
-                            f"Liquid Tokens: {proposal_data.get('liquid_tokens', 'Unknown')}",
-                        ]
-                    )
 
-                    # Add proposal contract info if it exists
-                    if proposal_data.get("proposal_contract"):
-                        proposal_data_str += f"\nProposal Contract: {proposal_data.get('proposal_contract')}"
-
-                # Format dao_info
-                dao_info_str = "No DAO information available."
-                if dao_info:
-                    dao_info_str = "\n".join(
-                        [
-                            f"DAO Name: {dao_info.get('name', 'Unknown')}",
-                            f"DAO Mission: {dao_info.get('mission', 'Unknown')}",
-                            f"DAO Description: {dao_info.get('description', 'Unknown')}",
-                        ]
-                    )
+class ImageProcessingNode(BaseCapabilityMixin):
+    """A workflow node to process proposal images: extract URLs, download, and base64 encode."""
 
-                # Format treasury_balance
-                treasury_balance_str = "Treasury balance information not available."
-                if treasury_balance is not None:
-                    treasury_balance_str = (
-                        f"Current DAO Treasury Balance: {treasury_balance} STX"
-                    )
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__(config=config, state_key="proposal_images")
+        self.initialize()
 
-                formatted_prompt = prompt.format(
-                    proposal_data=proposal_data_str,
-                    dao_info=dao_info_str,
-                    treasury_balance=treasury_balance_str,
-                    contract_source=contract_source,
-                    agent_prompts=agent_prompts_str,
-                    vector_context=vector_context,
-                    recent_tweets=tweets_content,
-                    web_search_results=web_search_content,
-                )
-                state["formatted_prompt"] = formatted_prompt
-                return state
-            except Exception as e:
-                self.logger.error(f"Error formatting prompt: {str(e)}", exc_info=True)
-                state["reasoning"] = f"Error formatting prompt: {str(e)}"
-                return state
-
-        async def call_evaluation_llm(state: EvaluationState) -> EvaluationState:
-            """Call the LLM with the formatted prompt for evaluation."""
-            if "reasoning" in state and "Error" in state["reasoning"]:
-                return state  # Skip if previous steps failed
-            try:
-                # Prepare message content with text and images
-                message_content = [{"type": "text", "text": state["formatted_prompt"]}]
+    async def process(self, state: ProposalEvaluationState) -> List[Dict[str, Any]]:
+        """The core logic for processing images, returns the list of processed image dicts directly."""
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_data_str = state.get("proposal_data", "")
 
-                # Add any proposal images if they exist
-                if state.get("proposal_images"):
-                    message_content.extend(state["proposal_images"])
+        if not proposal_data_str:
+            self.logger.info(
+                f"[ImageProcessorNode:{proposal_id}] No proposal_data string, skipping image processing."
+            )
+            return []
 
-                # Create the message for the LLM
-                message = HumanMessage(content=message_content)
+        self.logger.info(
+            f"[ImageProcessorNode:{proposal_id}] Starting image processing."
+        )
+        image_urls = extract_image_urls(proposal_data_str)
 
-                structured_output = self.llm.with_structured_output(
-                    ProposalEvaluationOutput, include_raw=True
-                )
-                result: Dict[str, Any] = await structured_output.ainvoke([message])
+        if not image_urls:
+            self.logger.info(
+                f"[ImageProcessorNode:{proposal_id}] No image URLs found in proposal data."
+            )
+            return []
 
-                parsed_result = result.get("parsed")
-                if not isinstance(parsed_result, ProposalEvaluationOutput):
-                    # Attempt to handle cases where parsing might return the raw dict
-                    if isinstance(parsed_result, dict):
-                        parsed_result = ProposalEvaluationOutput(**parsed_result)
-                    else:
-                        raise TypeError(
-                            f"Expected ProposalEvaluationOutput or dict, got {type(parsed_result)}"
-                        )
+        self.logger.info(
+            f"[ImageProcessorNode:{proposal_id}] Found {len(image_urls)} image URLs: {image_urls}"
+        )
+
+        processed_images = []
+        async with httpx.AsyncClient() as client:
+            for url in image_urls:
+                try:
+                    self.logger.debug(
+                        f"[ImageProcessorNode:{proposal_id}] Downloading image from {url}"
+                    )
+                    response = await client.get(url, timeout=10.0)
+                    response.raise_for_status()
+                    image_data = base64.b64encode(response.content).decode("utf-8")
+                    mime_type = "image/jpeg"
+                    if url.lower().endswith((".jpg", ".jpeg")):
+                        mime_type = "image/jpeg"
+                    elif url.lower().endswith(".png"):
+                        mime_type = "image/png"
+                    elif url.lower().endswith(".gif"):
+                        mime_type = "image/gif"
+                    elif url.lower().endswith(".webp"):
+                        mime_type = "image/webp"
+
+                    processed_images.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{mime_type};base64,{image_data}"
+                            },
+                        }
+                    )
+                    self.logger.debug(
+                        f"[ImageProcessorNode:{proposal_id}] Successfully processed image from {url}"
+                    )
+                except httpx.HTTPStatusError as e:
+                    self.logger.error(
+                        f"[ImageProcessorNode:{proposal_id}] HTTP error for {url}: {e.response.status_code}",
+                        exc_info=False,
+                    )
+                except httpx.RequestError as e:
+                    self.logger.error(
+                        f"[ImageProcessorNode:{proposal_id}] Request error for {url}: {str(e)}",
+                        exc_info=False,
+                    )
+                except Exception as e:
+                    self.logger.error(
+                        f"[ImageProcessorNode:{proposal_id}] Generic error for {url}: {str(e)}",
+                        exc_info=True,
+                    )
 
-                model_info = {"name": self.model_name, "temperature": self.temperature}
-                token_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        self.logger.info(
+            f"[ImageProcessorNode:{proposal_id}] Finished. {len(processed_images)} images processed."
+        )
+        return processed_images
 
-                raw_response = result.get("raw")
-                if raw_response:
-                    if hasattr(raw_response, "usage_metadata"):
-                        token_usage = raw_response.usage_metadata
-                    else:
-                        self.logger.warning("Raw response missing usage_metadata")
-                else:
-                    self.logger.warning("LLM result missing raw response data")
 
-                state["approve"] = parsed_result.approve
-                state["confidence_score"] = parsed_result.confidence_score
-                state["reasoning"] = parsed_result.reasoning
-                state["evaluation_token_usage"] = token_usage
-                state["evaluation_model_info"] = model_info
+class SocialContextAgent(BaseCapabilityMixin, WebSearchCapability):
+    """Social Context Agent gauges community sentiment and social impact."""
 
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        BaseCapabilityMixin.__init__(self, config=config, state_key="social_score")
+        WebSearchCapability.__init__(self)
+        self.initialize()
+        self._initialize_web_search_capability()
+
+    def _initialize_web_search_capability(self):
+        if not hasattr(self, "search_web"):
+            self.search_web = WebSearchCapability.search_web.__get__(
+                self, self.__class__
+            )
+            self.logger.info("Initialized web search capability for SocialContextAgent")
+
+    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
+        proposal_id = state.get("proposal_id", "unknown")
+        self._initialize_web_search_capability()
+        proposal_content = state.get("proposal_data", "")
+
+        social_context = ""
+        if self.config.get("enable_web_search", True):
+            try:
+                search_query = (
+                    f"Community sentiment {proposal_content[:50]} cryptocurrency DAO"
+                )
                 self.logger.debug(
-                    f"Evaluation step complete: Decision={'APPROVE' if parsed_result.approve else 'REJECT'} | Confidence={parsed_result.confidence_score:.2f}"
+                    f"[DEBUG:SocialAgent:{proposal_id}] Performing web search: {search_query}"
+                )
+                search_results, web_search_token_usage = await self.search_web(
+                    query=search_query,
+                    num_results=3,
+                )
+                social_context = "\n".join(
+                    [f"{r.get('page_content', '')}" for r in search_results]
+                )
+                self.logger.debug(
+                    f"[DEBUG:SocialAgent:{proposal_id}] Found {len(search_results)} web search results"
                 )
-                self.logger.debug(f"Full reasoning: {parsed_result.reasoning}")
 
-                return state
+                # Store web search token usage
+                if "token_usage" not in state:
+                    state["token_usage"] = {}
+                state["token_usage"]["social_web_search"] = web_search_token_usage
+
             except Exception as e:
-                self.logger.error(f"Error calling LLM: {str(e)}", exc_info=True)
-                state["approve"] = False
-                state["confidence_score"] = 0.0
-                state["reasoning"] = f"Error during LLM evaluation: {str(e)}"
-                return state
-
-        # Create decision node
-        async def should_vote(state: EvaluationState) -> str:
-            """Decide whether to vote based on confidence threshold."""
-            try:
-                self.logger.debug(
-                    f"Deciding vote: auto_vote={state['auto_vote']} | confidence={state['confidence_score']} | threshold={state['confidence_threshold']}"
+                logger.error(
+                    f"[DEBUG:SocialAgent:{proposal_id}] Web search failed: {str(e)}",
+                    exc_info=True,
                 )
+                social_context = "Web search unavailable."
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "social_context"],
+            template="""Gauge the community sentiment and social impact of this proposal.\\n            
+Proposal: {proposal_data}\\nSocial Context: {social_context}\\n
+Evaluate:\\n1. Community Sentiment: How might members perceive this?\\n2. Social Media Presence: Any discussions online about this?\\n3. Engagement Potential: Will this engage the community?\\n4. Cross-Platform Analysis: How does sentiment vary across platforms?\\n5. Social Risk: Any potential for controversy or division?\\n
+# ADDED: Image processing instructions
+**Image Analysis Instructions:**
+If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
+- Relevance: Does each image directly relate to and support the proposal's text or the community/social aspects being discussed?
+- Evidence: Do the images provide visual evidence for claims made (e.g., screenshots of community discussions, mockups of social impact visuals)?
+- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
+- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
+
+Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
+            """,
+        )
+        try:
+            self.logger.debug(
+                f"[DEBUG:SocialAgent:{proposal_id}] Formatting prompt for social evaluation"
+            )
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                social_context=social_context,
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:SocialAgent:{proposal_id}] Error formatting prompt: {str(e)}",
+                exc_info=True,
+            )
+            formatted_prompt_text = (
+                f"Gauge social impact of proposal: {proposal_content}"
+            )
+        try:
+            self.logger.debug(
+                f"[DEBUG:SocialAgent:{proposal_id}] Invoking LLM for social evaluation"
+            )
+
+            # ADDED: Image handling
+            proposal_images_list = state.get("proposal_images", [])
+            if not isinstance(proposal_images_list, list):
+                self.logger.warning(
+                    f"[DEBUG:SocialAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
+                )
+                proposal_images_list = []
 
-                if not state["auto_vote"]:
-                    self.logger.debug("Auto-vote is disabled, skipping vote")
-                    return "skip_vote"
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+            if proposal_images_list:
+                self.logger.debug(
+                    f"[DEBUG:SocialAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
+                )
+                message_content_list.extend(proposal_images_list)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            self.logger.info(
+                f"[DEBUG:SocialAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
+            )
+
+            # Track token usage - extract directly from LLM if available
+            token_usage_data = {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            }
 
-                if state["confidence_score"] >= state["confidence_threshold"]:
+            # Try to extract token usage directly from LLM response
+            if (
+                hasattr(self.llm, "_last_prompt_id")
+                and hasattr(self.llm, "client")
+                and hasattr(self.llm.client, "usage_by_prompt_id")
+            ):
+                last_prompt_id = self.llm._last_prompt_id
+                if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                    token_usage_data = {
+                        "input_tokens": usage.get("prompt_tokens", 0),
+                        "output_tokens": usage.get("completion_tokens", 0),
+                        "total_tokens": usage.get("total_tokens", 0),
+                    }
                     self.logger.debug(
-                        f"Confidence score {state['confidence_score']} meets threshold {state['confidence_threshold']}, proceeding to vote"
+                        f"[DEBUG:SocialAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
                     )
-                    return "vote"
-                else:
+            # Fallback to estimation
+            if token_usage_data["total_tokens"] == 0:
+                # Get model name from LLM
+                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+                # First calculate token count from the text
+                token_count = len(formatted_prompt_text) // 4  # Simple estimation
+                # Create token usage dictionary for calculate_token_cost
+                token_usage_dict = {"input_tokens": token_count}
+                # Calculate cost
+                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+                token_usage_data = {
+                    "input_tokens": token_count,
+                    "output_tokens": len(result.model_dump_json())
+                    // 4,  # rough estimate
+                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                }
+                self.logger.debug(
+                    f"[DEBUG:SocialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
+                )
+
+            # Add token usage to state
+            if "token_usage" not in state:
+                state["token_usage"] = {}
+            state["token_usage"]["social_agent"] = token_usage_data
+
+            result_dict = result.model_dump()
+            # Update state with the result
+            update_state_with_agent_result(state, result_dict, "social")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:SocialAgent:{proposal_id}] Error in social evaluation: {str(e)}",
+                exc_info=True,
+            )
+            fallback_score_dict = {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
+            self.logger.info(
+                f"[DEBUG:SocialAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
+            )
+            return fallback_score_dict
+
+
+class ReasoningAgent(BaseCapabilityMixin, PlanningCapability):
+    """Configuration & Reasoning Agent synthesizes evaluations and makes decisions."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Reasoning Agent."""
+        BaseCapabilityMixin.__init__(self, config=config, state_key="final_score")
+        self.initialize()
+        planning_queue = asyncio.Queue()
+        callback_handler = self.config.get(
+            "callback_handler"
+        ) or StreamingCallbackHandler(planning_queue)
+        PlanningCapability.__init__(
+            self,
+            callback_handler=callback_handler,
+            planning_llm=ChatOpenAI(
+                model=self.config.get("planning_model", "gpt-4.1-mini")
+            ),
+            persona="DAO Proposal Evaluator",
+        )
+        self._initialize_planning_capability()
+
+    def _initialize_planning_capability(self):
+        """Initialize planning capability methods."""
+        if not hasattr(self, "create_plan"):
+            self.create_plan = PlanningCapability.create_plan.__get__(
+                self, self.__class__
+            )
+            self.logger.info("Initialized planning capability for ReasoningAgent")
+
+    def integrate_with_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Integrate planning capability with the graph."""
+        pass
+
+    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
+        proposal_id = state.get("proposal_id", "unknown")
+        self._initialize_planning_capability()
+        proposal_content = state.get("proposal_data", "")
+        self.logger.debug(
+            f"[DEBUG:ReasoningAgent:{proposal_id}] Beginning final evaluation processing with proposal_content (length: {len(proposal_content)})"
+        )
+
+        def safe_get_score(value, default=0):
+            if isinstance(value, dict) and "score" in value:
+                return value.get("score", default)
+            elif isinstance(value, int):
+                return value
+            return default
+
+        core_score = state.get("core_score", {})
+        historical_score = state.get("historical_score", {})
+        financial_score = state.get("financial_score", {})
+        social_score = state.get("social_score", {})
+
+        core_score_val = safe_get_score(core_score)
+        historical_score_val = safe_get_score(historical_score)
+        financial_score_val = safe_get_score(financial_score)
+        social_score_val = safe_get_score(social_score)
+
+        self.logger.debug(
+            f"[DEBUG:ReasoningAgent:{proposal_id}] Input scores: Core={core_score_val}, Historical={historical_score_val}, Financial={financial_score_val}, Social={social_score_val}"
+        )
+
+        scores = {
+            "Core Context": core_score_val,
+            "Historical Context": historical_score_val,
+            "Financial Context": financial_score_val,
+            "Social Context": social_score_val,
+        }
+        summaries = state.get("summaries", {})
+        flags = state.get("flags", [])
+
+        self.logger.debug(
+            f"[DEBUG:ReasoningAgent:{proposal_id}] Summaries: {summaries}"
+        )
+
+        self.logger.debug(f"[DEBUG:ReasoningAgent:{proposal_id}] Flags raised: {flags}")
+
+        # Update the summaries with the content from each agent's evaluation
+        if isinstance(core_score, dict) and "summary" in core_score:
+            summaries["core_score"] = core_score["summary"]
+        if isinstance(historical_score, dict) and "summary" in historical_score:
+            summaries["historical_score"] = historical_score["summary"]
+        if isinstance(financial_score, dict) and "summary" in financial_score:
+            summaries["financial_score"] = financial_score["summary"]
+        if isinstance(social_score, dict) and "summary" in social_score:
+            summaries["social_score"] = social_score["summary"]
+
+        # Update flags
+        for score_obj in [core_score, historical_score, financial_score, social_score]:
+            if (
+                isinstance(score_obj, dict)
+                and "flags" in score_obj
+                and isinstance(score_obj["flags"], list)
+            ):
+                flags.extend(score_obj["flags"])
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "scores", "summaries", "flags"],
+            template="""Synthesize all evaluations and make a final decision on this proposal.\\n            
+Proposal: {proposal_data}\\n
+Evaluations:\\n- Core Context (Score: {scores[Core Context]}): {summaries[core_score]}\\n- Historical Context (Score: {scores[Historical Context]}): {summaries[historical_score]}\\n- Financial Context (Score: {scores[Financial Context]}): {summaries[financial_score]}\\n- Social Context (Score: {scores[Social Context]}): {summaries[social_score]}\\n
+Flags Raised: {flags}\\n
+Synthesize these evaluations to:\\n1. Weigh the importance of each context\\n2. Calibrate confidence based on available information\\n3. Consider the implications of the flags raised\\n4. Make a final decision: Approve or Reject\\n5. Calculate an overall score\\n
+Provide a final score, decision (Approve/Reject), and detailed explanation.\\n            
+            """,
+        )
+
+        try:
+            for key in [
+                "core_score",
+                "historical_score",
+                "financial_score",
+                "social_score",
+            ]:
+                if key not in summaries:
+                    summaries[key] = "No evaluation available"
+
+            self.logger.debug(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Formatting final evaluation prompt"
+            )
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                scores=scores,
+                summaries=summaries,
+                flags=", ".join(flags) if flags else "None",
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Error formatting prompt: {str(e)}",
+                exc_info=True,
+            )
+            formatted_prompt_text = f"""Synthesize evaluations for proposal: {proposal_content}
+Scores: {scores}
+Flags: {flags}
+Provide a final score, decision (Approve/Reject), and explanation."""
+
+        try:
+            self.logger.debug(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Invoking LLM for final decision"
+            )
+            result = await self.llm.with_structured_output(FinalOutput).ainvoke(
+                [formatted_prompt_text]
+            )
+
+            self.logger.info(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] FINAL DECISION: {result.decision} | SCORE={result.score}/100 | EXPLANATION={result.explanation}"
+            )
+
+            # Track token usage - extract directly from LLM if available
+            token_usage_data = {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            }
+
+            # Try to extract token usage directly from LLM response
+            if (
+                hasattr(self.llm, "_last_prompt_id")
+                and hasattr(self.llm, "client")
+                and hasattr(self.llm.client, "usage_by_prompt_id")
+            ):
+                last_prompt_id = self.llm._last_prompt_id
+                if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                    token_usage_data = {
+                        "input_tokens": usage.get("prompt_tokens", 0),
+                        "output_tokens": usage.get("completion_tokens", 0),
+                        "total_tokens": usage.get("total_tokens", 0),
+                    }
                     self.logger.debug(
-                        f"Confidence score {state['confidence_score']} below threshold {state['confidence_threshold']}, skipping vote"
+                        f"[DEBUG:ReasoningAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
                     )
-                    return "skip_vote"
-            except Exception as e:
-                self.logger.error(f"Error in should_vote: {str(e)}", exc_info=True)
-                return "skip_vote"
+            # Fallback to estimation
+            if token_usage_data["total_tokens"] == 0:
+                # Get model name from LLM
+                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+                # First calculate token count from the text
+                token_count = len(formatted_prompt_text) // 4  # Simple estimation
+                # Create token usage dictionary for calculate_token_cost
+                token_usage_dict = {"input_tokens": token_count}
+                # Calculate cost
+                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+                token_usage_data = {
+                    "input_tokens": token_count,
+                    "output_tokens": len(result.model_dump_json())
+                    // 4,  # rough estimate
+                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                }
+                self.logger.debug(
+                    f"[DEBUG:ReasoningAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
+                )
 
-        # Create voting node using VectorReact workflow
-        async def vote_on_proposal(state: EvaluationState) -> EvaluationState:
-            """Vote on the proposal using VectorReact workflow."""
-            try:
-                # Check if wallet_id is available
-                if not state.get("wallet_id"):
-                    self.logger.warning(
-                        "No wallet_id provided for voting, skipping vote"
+            # Add token usage to state
+            if "token_usage" not in state:
+                state["token_usage"] = {}
+            state["token_usage"]["reasoning_agent"] = token_usage_data
+
+            result_dict = result.model_dump()
+            # Update state with the result
+            update_state_with_agent_result(state, result_dict, "reasoning")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Error in final evaluation: {str(e)}",
+                exc_info=True,
+            )
+            self.logger.info(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] ERROR_SCORE=50/100 | DECISION=Pending | REASON=Error: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "decision": "Pending",
+                "explanation": f"Unable to make final decision due to error: {str(e)}",
+            }
+
+
+class ProposalEvaluationWorkflow(BaseWorkflow[ProposalEvaluationState]):
+    """Main workflow for evaluating DAO proposals using a hierarchical team."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the proposal evaluation workflow."""
+        super().__init__()
+        self.config = config or {}
+        self.hierarchical_workflow = HierarchicalTeamWorkflow(
+            name="ProposalEvaluation",
+            config={
+                "state_type": ProposalEvaluationState,
+                "recursion_limit": self.config.get("recursion_limit", 20),
+            },
+        )
+
+        # Instantiate and add the new ImageProcessingNode
+        image_processor_agent = ImageProcessingNode(
+            config=self.config
+        )  # Use self.config
+        self.hierarchical_workflow.add_sub_workflow(
+            "image_processor", image_processor_agent
+        )
+
+        core_agent = CoreContextAgent(self.config)
+        historical_agent = HistoricalContextAgent(self.config)
+        financial_agent = FinancialContextAgent(self.config)
+        social_agent = SocialContextAgent(self.config)
+        reasoning_agent = ReasoningAgent(self.config)
+
+        self.hierarchical_workflow.add_sub_workflow("core_agent", core_agent)
+        self.hierarchical_workflow.add_sub_workflow(
+            "historical_agent", historical_agent
+        )
+        self.hierarchical_workflow.add_sub_workflow("financial_agent", financial_agent)
+        self.hierarchical_workflow.add_sub_workflow("social_agent", social_agent)
+        self.hierarchical_workflow.add_sub_workflow("reasoning_agent", reasoning_agent)
+
+        self.hierarchical_workflow.set_entry_point("image_processor")
+
+        def supervisor_logic(state: ProposalEvaluationState) -> Union[str, List[str]]:
+            """Determine the next step in the workflow."""
+            proposal_id = state.get("proposal_id", "unknown")
+
+            # Debugging current state view for supervisor
+            logger.debug(
+                f"[DEBUG:Supervisor:{proposal_id}] Evaluating next step. State keys: {list(state.keys())}. "
+                f"proposal_images set: {'proposal_images' in state}, "
+                f"core_score set: {state.get('core_score') is not None}, "
+                f"historical_score set: {state.get('historical_score') is not None}, "
+                f"financial_score set: {state.get('financial_score') is not None}, "
+                f"social_score set: {state.get('social_score') is not None}, "
+                f"final_score set: {state.get('final_score') is not None}"
+            )
+
+            if state.get("halt", False):
+                logger.debug(
+                    f"[DEBUG:Supervisor:{proposal_id}] Halt condition met, returning END"
+                )
+                return END
+
+            # After image_processor (entry point), if core_score isn't set, go to core_agent.
+            # The image_processor node output (even if empty list for images) should be in state.
+            if state.get("core_score") is None:
+                # This will be the first check after image_processor completes as it's the entry point.
+                current_core_invocations = state.get("core_agent_invocations", 0)
+                if current_core_invocations > 3:
+                    logger.error(
+                        f"[DEBUG:Supervisor:{proposal_id}] Core agent invoked too many times ({current_core_invocations}), halting."
                     )
-                    state["vote_result"] = {
-                        "success": False,
-                        "error": "No wallet_id provided for voting",
-                    }
-                    return state
+                    return END
 
-                self.logger.debug(
-                    f"Setting up VectorReact workflow: proposal_id={state['proposal_id']} | vote={state['approve']}"
+                # Do not manually increment core_agent_invocations - the langgraph framework will handle this
+                # with the Annotated type we restored
+
+                logger.debug(
+                    f"[DEBUG:Supervisor:{proposal_id}] Routing to core_agent (core_score is None, invocation #{current_core_invocations})."
                 )
+                return "core_agent"
 
-                # Set up the voting tool
-                vote_tool = VoteOnActionProposalTool(wallet_id=state["wallet_id"])
-                tools_map = {"dao_action_vote_on_proposal": vote_tool}
+            if state.get("historical_score") is None:
+                logger.debug(
+                    f"[DEBUG:Supervisor:{proposal_id}] Routing to historical_agent."
+                )
+                return "historical_agent"
+
+            if (
+                state.get("financial_score") is None
+                or state.get("social_score") is None
+            ):
+                parallel_nodes = []
+                if state.get("financial_score") is None:
+                    parallel_nodes.append("financial_agent")
+                if state.get("social_score") is None:
+                    parallel_nodes.append("social_agent")
+                logger.debug(
+                    f"[DEBUG:Supervisor:{proposal_id}] Initiating parallel execution of {parallel_nodes}"
+                )
+                return parallel_nodes
+
+            if state.get("final_score") is None:
+                logger.debug(
+                    f"[DEBUG:Supervisor:{proposal_id}] All scores available but final score is None, routing to reasoning_agent"
+                )
+                return "reasoning_agent"
 
-                # Create a user input message that instructs the LLM what to do
-                vote_instruction = f"I need you to vote on a DAO proposal with ID {state['proposal_id']} in the contract {state['action_proposals_contract']}. Please vote {'FOR' if state['approve'] else 'AGAINST'} the proposal. Use the dao_action_vote_on_proposal tool to submit the vote."
+            logger.debug(
+                f"[DEBUG:Supervisor:{proposal_id}] All scores completed, returning END"
+            )
+            return END
 
-                # Create VectorLangGraph service with collections
-                service = ChatService(
-                    collection_names=self.collection_names,
+        self.hierarchical_workflow.set_supervisor_logic(supervisor_logic)
+
+        def halt_condition(state: ProposalEvaluationState) -> bool:
+            """Check if workflow should halt."""
+            proposal_id = state.get("proposal_id", "unknown")
+
+            if state.get("halt", False):
+                logger.debug(
+                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow due to explicit halt flag"
                 )
+                return True
 
-                # History with system message only
-                history = [
-                    {
-                        "role": "system",
-                        "content": "You are a helpful assistant tasked with voting on DAO proposals. Follow the instructions precisely.",
-                    }
-                ]
-
-                self.logger.debug("Executing VectorReact workflow for voting...")
-
-                # Collect response chunks
-                response_chunks = []
-                vote_result = None
-
-                # Execute the VectorReact workflow
-                async for chunk in service.execute_stream(
-                    history=history,
-                    input_str=vote_instruction,
-                    tools_map=tools_map,
-                ):
-                    response_chunks.append(chunk)
-                    self.logger.debug(f"VectorReact chunk: {chunk}")
-
-                    # Extract tool results
-                    if (
-                        chunk.get("type") == "tool"
-                        and chunk.get("tool") == "dao_action_vote_on_proposal"
-                    ):
-                        if "output" in chunk:
-                            vote_result = chunk.get("output")
-                            self.logger.debug(f"Vote result: {vote_result}")
-
-                # Update state with vote result and vector results
-                state["vote_result"] = {
-                    "success": vote_result is not None,
-                    "output": vote_result,
-                }
-                state["vector_results"] = [
-                    chunk.get("vector_results", [])
-                    for chunk in response_chunks
-                    if chunk.get("vector_results")
-                ]
+            # Check for excessive core agent invocations
+            if state.get("core_agent_invocations", 0) > 3:
+                logger.debug(
+                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow due to excessive core agent invocations: {state.get('core_agent_invocations', 0)}"
+                )
+                return True
 
-                return state
-            except Exception as e:
-                self.logger.error(f"Error in vote_on_proposal: {str(e)}", exc_info=True)
-                state["vote_result"] = {
-                    "success": False,
-                    "error": f"Error during voting: {str(e)}",
-                }
-                return state
+            recursion_count = state.get("recursion_count", 0)
+            if recursion_count > 8:
+                logger.debug(
+                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow - possible loop detected after {recursion_count} iterations"
+                )
+                return True
+
+            if (
+                state.get("core_score") is not None
+                and state.get("historical_score") is not None
+                and state.get("financial_score") is not None
+                and state.get("social_score") is not None
+                and state.get("final_score") is None
+                and recursion_count > 3
+            ):
+                logger.debug(
+                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow - reasoning agent appears to be failing after {recursion_count} attempts"
+                )
+                return True
 
-        # Create skip voting node
-        async def skip_voting(state: EvaluationState) -> EvaluationState:
-            """Skip voting and just return the evaluation."""
-            try:
-                self.logger.debug("Vote skipped: reason=threshold_or_setting")
-                state["vote_result"] = {
-                    "success": True,
-                    "message": "Voting skipped due to confidence threshold or auto_vote setting",
-                    "data": None,
-                }
-                return state
-            except Exception as e:
-                self.logger.error(f"Error in skip_voting: {str(e)}", exc_info=True)
-                state["vote_result"] = {
-                    "success": True,
-                    "message": f"Voting skipped (with error: {str(e)})",
-                    "data": None,
-                }
-                return state
-
-        # Create the graph
-        workflow = StateGraph(EvaluationState)
-
-        # Add nodes
-        workflow.add_node("fetch_context", fetch_context)
-        workflow.add_node("format_prompt", format_evaluation_prompt)
-        workflow.add_node("evaluate", call_evaluation_llm)
-        workflow.add_node("vote", vote_on_proposal)
-        workflow.add_node("skip_vote", skip_voting)
-
-        # Set up the conditional branching
-        workflow.set_entry_point("fetch_context")  # Start with fetching context
-        workflow.add_edge("fetch_context", "format_prompt")
-        workflow.add_edge("format_prompt", "evaluate")
-        workflow.add_conditional_edges(
-            "evaluate",
-            should_vote,
-            {
-                "vote": "vote",
-                "skip_vote": "skip_vote",
-            },
+            state["recursion_count"] = recursion_count + 1
+            logger.debug(
+                f"[DEBUG:HaltCondition:{proposal_id}] Incrementing recursion counter to {state['recursion_count']}"
+            )
+
+            return False
+
+        self.hierarchical_workflow.set_halt_condition(halt_condition)
+        self.required_fields = ["proposal_id", "proposal_data"]
+
+    def _create_prompt(self) -> PromptTemplate:
+        """Create the main workflow prompt."""
+        return PromptTemplate(
+            input_variables=["proposal_data"],
+            template="Evaluate the DAO proposal: {proposal_data}",
         )
-        workflow.add_edge("vote", END)
-        workflow.add_edge("skip_vote", END)
 
-        return workflow.compile()
+    def _create_graph(self) -> StateGraph:
+        """Create the workflow graph."""
+        return self.hierarchical_workflow.build_graph()
 
-    def _validate_state(self, state: EvaluationState) -> bool:
+    def _validate_state(self, state: ProposalEvaluationState) -> bool:
         """Validate the workflow state."""
-        # Only validate minimal required fields for initial state
-        # Other fields like proposal_data are fetched within the workflow
-        required_fields = ["proposal_id"]
+        if not super()._validate_state(state):
+            return False
 
-        # Log the state for debugging
-        self.logger.debug(
-            f"Validating initial state: proposal_id={state.get('proposal_id')}"
-        )
+        if "flags" not in state:
+            state["flags"] = []
+        elif state["flags"] is None:
+            state["flags"] = []
+
+        if "summaries" not in state:
+            state["summaries"] = {}
+        elif state["summaries"] is None:
+            state["summaries"] = {}
+
+        if "halt" not in state:
+            state["halt"] = False
+
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+        elif state["token_usage"] is None:
+            state["token_usage"] = {}
 
-        # Check all fields and log problems
-        for field in required_fields:
-            if field not in state:
-                self.logger.error(f"Missing required field: {field}")
-                return False
-            elif not state[field]:
-                self.logger.error(f"Empty required field: {field}")
-                return False
-
-        # Note: Detailed validation of proposal_data happens in fetch_context node
-        self.logger.debug("Initial state validation successful")
         return True
 
 
+async def evaluate_proposal(
+    proposal_id: str,
+    proposal_data: str,
+    config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Evaluate a proposal using the hierarchical team workflow."""
+    logger.info(f"[DEBUG:Workflow:{proposal_id}] Starting evaluation workflow")
+
+    debug_level = 0
+    if config and "debug_level" in config:
+        debug_level = config.get("debug_level", 0)
+        logger.debug(f"[PROPOSAL_DEBUG] Using debug_level: {debug_level}")
+
+    logger.debug(
+        f"[PROPOSAL_DEBUG] evaluate_proposal received proposal_id: {proposal_id}"
+    )
+    logger.debug(
+        f"[PROPOSAL_DEBUG] evaluate_proposal received proposal_data type: {type(proposal_data)}"
+    )
+
+    if not proposal_data:
+        logger.warning(
+            f"[PROPOSAL_DEBUG] proposal_data is empty or None! This will cause evaluation failure."
+        )
+
+    state = {
+        "proposal_id": proposal_id,
+        "proposal_data": proposal_data,
+        "flags": [],
+        "summaries": {},
+        "halt": False,
+        "token_usage": {},
+        "core_score": None,
+        "historical_score": None,
+        "financial_score": None,
+        "social_score": None,
+        "final_score": None,
+        "decision": None,
+        "core_agent_invocations": 0,
+        "recursion_count": 0,
+    }
+
+    logger.debug(
+        f"[DEBUG:Workflow:{proposal_id}] Initialized workflow state with keys: {state.keys()}"
+    )
+    logger.debug(
+        f"[PROPOSAL_DEBUG] Proposal data in state: {state.get('proposal_data')}"
+    )
+
+    try:
+        workflow = ProposalEvaluationWorkflow(config or {})
+        logger.info(
+            f"[DEBUG:Workflow:{proposal_id}] Executing hierarchical team workflow"
+        )
+        result = await workflow.execute(state)
+        logger.info(
+            f"[DEBUG:Workflow:{proposal_id}] Workflow execution completed with decision: {result.get('decision', 'Unknown')}"
+        )
+
+        logger.debug(f"[DEBUG:Workflow:{proposal_id}] RESULT SCORES TYPES:")
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Core: {type(result.get('core_score'))} = {repr(result.get('core_score'))}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Historical: {type(result.get('historical_score'))} = {repr(result.get('historical_score'))}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Financial: {type(result.get('financial_score'))} = {repr(result.get('financial_score'))}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Social: {type(result.get('social_score'))} = {repr(result.get('social_score'))}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Final: {type(result.get('final_score'))} = {repr(result.get('final_score'))}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] - Decision: {type(result.get('decision'))} = {repr(result.get('decision'))}"
+        )
+
+        if result is None:
+            logger.error(
+                f"[DEBUG:Workflow:{proposal_id}] Workflow returned None result, using default values"
+            )
+            return {
+                "proposal_id": proposal_id,
+                "score": 0,
+                "decision": "Error",
+                "explanation": "Evaluation failed: Workflow returned empty result",
+                "component_scores": {
+                    "core": 0,
+                    "historical": 0,
+                    "financial": 0,
+                    "social": 0,
+                },
+                "flags": ["Workflow error: Empty result"],
+                "token_usage": {},
+            }
+
+        def safe_extract_score(value, default=0):
+            if isinstance(value, dict) and "score" in value:
+                return value.get("score", default)
+            elif isinstance(value, int):
+                return value
+            elif isinstance(value, str):
+                try:
+                    return int(value)
+                except ValueError:
+                    pass  # If string is not int, will fall through to default
+            return default
+
+        final_score_val = result.get("final_score")
+        logger.debug(
+            f"[DEBUG:evaluate_proposal] Raw final_score_val from result state: {repr(final_score_val)} (type: {type(final_score_val)})"
+        )
+
+        final_score_dict = {}
+        if isinstance(final_score_val, dict):
+            final_score_dict = final_score_val
+
+        component_scores = {
+            "core": safe_extract_score(result.get("core_score")),
+            "historical": safe_extract_score(result.get("historical_score")),
+            "financial": safe_extract_score(result.get("financial_score")),
+            "social": safe_extract_score(result.get("social_score")),
+        }
+
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] EXTRACTED COMPONENT SCORES: {component_scores}"
+        )
+
+        explanation = ""
+        if isinstance(final_score_dict, dict) and "explanation" in final_score_dict:
+            explanation = final_score_dict.get("explanation", "")
+        elif isinstance(final_score_val, str):
+            explanation = final_score_val
+
+        # Log the explanation to help debug
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] Explanation extracted: {explanation[:100]}..."
+        )
+
+        final_score = 0
+        if isinstance(final_score_dict, dict) and "score" in final_score_dict:
+            final_score = final_score_dict.get("score", 0)
+        else:
+            final_score = safe_extract_score(final_score_val)
+
+        decision = result.get("decision")
+        if decision is None:
+            if isinstance(final_score_dict, dict) and "decision" in final_score_dict:
+                decision = final_score_dict.get("decision")
+            else:
+                decision = "Reject"
+
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] Final decision: {decision}, score: {final_score}"
+        )
+
+        total_token_usage = result.get("token_usage", {})
+        total_input_tokens = 0
+        total_output_tokens = 0
+        total_tokens = 0
+
+        # Aggregate tokens from all agent steps
+        # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
+        # If each agent could use a different model, this would need more detailed per-model tracking
+        for agent_key, usage_data in total_token_usage.items():
+            if isinstance(usage_data, dict):
+                total_input_tokens += usage_data.get("input_tokens", 0)
+                total_output_tokens += usage_data.get("output_tokens", 0)
+                total_tokens += usage_data.get("total_tokens", 0)
+            else:
+                logger.warning(
+                    f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
+                )
+
+        # Extract component summaries for detailed reporting
+        component_summaries = {}
+        if isinstance(result.get("summaries"), dict):
+            component_summaries = result.get("summaries")
+
+        # Extract and aggregate flags
+        all_flags = result.get("flags", [])
+        if not isinstance(all_flags, list):
+            all_flags = []
+
+        # Placeholder for web search specific token usage if it were tracked separately
+        # In the original, these seemed to be fixed placeholders.
+        web_search_input_tokens = 0
+        web_search_output_tokens = 0
+        web_search_total_tokens = 0
+
+        # Initialize total token usage by model
+        total_token_usage_by_model = {}
+
+        # Extract token usage by model from token_usage data
+        for agent_name, agent_usage in total_token_usage.items():
+            if isinstance(agent_usage, dict) and agent_usage.get("total_tokens", 0) > 0:
+                # Use default model name if not specified
+                model_name = "gpt-4.1"  # default model name
+
+                # Initialize the model entry if needed
+                if model_name not in total_token_usage_by_model:
+                    total_token_usage_by_model[model_name] = {
+                        "input_tokens": 0,
+                        "output_tokens": 0,
+                        "total_tokens": 0,
+                    }
+
+                # Add token usage for this agent to the model's tally
+                total_token_usage_by_model[model_name][
+                    "input_tokens"
+                ] += agent_usage.get("input_tokens", 0)
+                total_token_usage_by_model[model_name][
+                    "output_tokens"
+                ] += agent_usage.get("output_tokens", 0)
+                total_token_usage_by_model[model_name][
+                    "total_tokens"
+                ] += agent_usage.get("total_tokens", 0)
+
+        # Fallback if no token usage was recorded
+        if not total_token_usage_by_model:
+            total_token_usage_by_model["gpt-4.1"] = {
+                "input_tokens": total_input_tokens,
+                "output_tokens": total_output_tokens,
+                "total_tokens": total_tokens,
+            }
+
+        # Improved cost calculation by model
+        cost_per_thousand = {
+            "gpt-4.1": 0.01,  # $0.01 per 1K tokens
+            "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
+            "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
+            "gpt-4": 0.03,  # $0.03 per 1K tokens
+            "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
+            "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
+            "default": 0.01,  # default fallback
+        }
+
+        # Calculate costs for each model
+        total_cost_by_model = {}
+        total_overall_cost = 0.0
+        for model_name, usage in total_token_usage_by_model.items():
+            # Get cost per 1K tokens for this model
+            model_cost_per_k = cost_per_thousand.get(
+                model_name, cost_per_thousand["default"]
+            )
+            # Calculate cost for this model's usage
+            model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
+            total_cost_by_model[model_name] = model_cost
+            total_overall_cost += model_cost
+
+        if not total_cost_by_model:
+            # Fallback if no models were recorded
+            model_name = "gpt-4.1"  # Default model name
+            total_cost_by_model[model_name] = total_tokens * (
+                cost_per_thousand["default"] / 1000
+            )
+            total_overall_cost = total_cost_by_model[model_name]
+
+        final_result = {
+            "success": True,
+            "evaluation": {
+                "approve": decision == "Approve",
+                "confidence_score": final_score / 100.0 if final_score else 0.0,
+                "reasoning": explanation,
+            },
+            "decision": decision,
+            "score": final_score,
+            "explanation": explanation,
+            "component_scores": component_scores,
+            "component_summaries": component_summaries,  # Include component summaries
+            "flags": all_flags,
+            "token_usage": total_token_usage,
+            "web_search_results": [],
+            "treasury_balance": None,
+            "web_search_token_usage": {
+                "input_tokens": web_search_input_tokens,
+                "output_tokens": web_search_output_tokens,
+                "total_tokens": web_search_total_tokens,
+            },
+            "evaluation_token_usage": {
+                "input_tokens": total_input_tokens,
+                "output_tokens": total_output_tokens,
+                "total_tokens": total_tokens,
+            },
+            "evaluation_model_info": {"name": "gpt-4.1", "temperature": 0.1},
+            "web_search_model_info": {"name": "gpt-4.1", "temperature": 0.1},
+            "total_token_usage_by_model": total_token_usage_by_model,
+            "total_cost_by_model": total_cost_by_model,
+            "total_overall_cost": total_overall_cost,
+            "summaries": component_summaries,
+        }
+
+        logger.debug(
+            f"Proposal evaluation completed: Success={final_result['success']} | Decision={'APPROVE' if decision == 'Approve' else 'REJECT'} | Confidence={final_result['evaluation']['confidence_score']:.2f} | Auto-voted={decision == 'Approve'}"
+        )
+        return final_result
+    except Exception as e:
+        logger.error(f"Error in workflow execution: {str(e)}", exc_info=True)
+        return {
+            "proposal_id": proposal_id,
+            "score": 0,
+            "decision": "Error",
+            "explanation": f"Evaluation failed: {str(e)}",
+            "component_scores": {
+                "core": 0,
+                "historical": 0,
+                "financial": 0,
+                "social": 0,
+            },
+            "flags": [f"Workflow error: {str(e)}"],
+            "token_usage": {},
+        }
+
+
 def get_proposal_evaluation_tools(
     profile: Optional[Profile] = None, agent_id: Optional[UUID] = None
 ):
-    """Get the tools needed for proposal evaluation.
-
-    Args:
-        profile: Optional user profile
-        agent_id: Optional agent ID
-
-    Returns:
-        Dictionary of filtered tools for proposal evaluation
-    """
-    # Initialize all tools
+    """Get the tools needed for proposal evaluation."""
     all_tools = initialize_tools(profile=profile, agent_id=agent_id)
     logger.debug(f"Available tools: {', '.join(all_tools.keys())}")
-
-    # Filter to only include the tools we need
     required_tools = [
         "dao_action_get_proposal",
         "dao_action_vote_on_proposal",
         "dao_action_get_voting_power",
         "dao_action_get_voting_configuration",
-        "database_get_dao_get_by_name",  # Try old name
-        "dao_search",  # Try new name
+        "database_get_dao_get_by_name",
+        "dao_search",
     ]
-
     filtered_tools = filter_tools_by_names(required_tools, all_tools)
     logger.debug(f"Using tools: {', '.join(filtered_tools.keys())}")
-
     return filtered_tools
 
 
@@ -935,26 +1717,13 @@ async def evaluate_and_vote_on_proposal(
     auto_vote: bool = True,
     confidence_threshold: float = 0.7,
     dao_id: Optional[UUID] = None,
+    debug_level: int = 0,  # 0=normal, 1=verbose, 2=very verbose
 ) -> Dict:
-    """Evaluate a proposal and automatically vote based on the evaluation.
-
-    Args:
-        proposal_id: The ID of the proposal to evaluate and vote on
-        wallet_id: Optional wallet ID to use for voting
-        agent_id: Optional agent ID to use for retrieving prompts
-        auto_vote: Whether to automatically vote based on the evaluation
-        confidence_threshold: Minimum confidence score required to auto-vote (0.0-1.0)
-        dao_id: Optional DAO ID to explicitly pass to the workflow
-
-    Returns:
-        Dictionary containing the evaluation results and voting outcome
-    """
+    """Evaluate a proposal and automatically vote based on the evaluation."""
     logger.debug(
-        f"Starting proposal evaluation: proposal_id={proposal_id} | auto_vote={auto_vote} | confidence_threshold={confidence_threshold}"
+        f"Starting proposal evaluation: proposal_id={proposal_id} | auto_vote={auto_vote} | confidence_threshold={confidence_threshold} | debug_level={debug_level}"
     )
-
     try:
-        # Determine effective agent ID
         effective_agent_id = agent_id
         if not effective_agent_id and wallet_id:
             wallet = backend.get_wallet(wallet_id)
@@ -964,17 +1733,14 @@ async def evaluate_and_vote_on_proposal(
                     f"Using agent ID {effective_agent_id} from wallet {wallet_id}"
                 )
 
-        # Fetch the primary prompt to determine model and temperature settings
-        # Note: Actual prompt text fetching happens inside the workflow now.
-        model_name = "gpt-4.1"  # Default model
-        temperature = 0.1  # Default temperature
+        model_name = "gpt-4.1"
+        temperature = 0.1
         if effective_agent_id:
             try:
-                # We only need one active prompt to get settings
                 prompts = backend.list_prompts(
                     PromptFilter(
                         agent_id=effective_agent_id,
-                        dao_id=dao_id,  # Assuming dao_id is available, might need refinement
+                        dao_id=dao_id,
                         is_active=True,
                         limit=1,
                     )
@@ -992,171 +1758,252 @@ async def evaluate_and_vote_on_proposal(
                     )
                 else:
                     logger.warning(
-                        f"No active prompts found for agent {effective_agent_id} to determine settings."
+                        f"No active prompts found for agent {effective_agent_id}."
                     )
             except Exception as e:
                 logger.error(
                     f"Failed to get agent prompt settings: {str(e)}", exc_info=True
                 )
 
-        # Initialize state (minimal initial data)
-        state = {
-            "proposal_id": proposal_id,
-            "dao_id": dao_id,  # Pass DAO ID to the workflow
-            "agent_id": effective_agent_id,  # Pass Agent ID for prompt loading
-            "wallet_id": wallet_id,  # Pass wallet ID for voting tool
-            "approve": False,
-            "confidence_score": 0.0,
-            "reasoning": "",
-            "vote_result": None,
-            "confidence_threshold": confidence_threshold,
-            "auto_vote": auto_vote,
-            "vector_results": None,
-            "recent_tweets": None,
-            "web_search_results": None,
-            "token_usage": None,
-            "model_info": None,
-            "web_search_token_usage": None,
-            "evaluation_token_usage": None,
-            "evaluation_model_info": None,
-            "web_search_model_info": None,
+        logger.debug(
+            f"[PROPOSAL_DEBUG] Fetching proposal data from backend for ID: {proposal_id}"
+        )
+        proposal_data = backend.get_proposal(proposal_id)
+        if not proposal_data:
+            logger.error(
+                f"[PROPOSAL_DEBUG] No proposal data found for ID: {proposal_id}"
+            )
+            raise ValueError(f"Proposal {proposal_id} not found")
+
+        logger.debug(f"[PROPOSAL_DEBUG] Raw proposal data: {proposal_data}")
+
+        proposal_content = proposal_data.parameters or ""
+        if not proposal_content:
+            logger.warning(f"[PROPOSAL_DEBUG] Proposal parameters/content is empty!")
+
+        config = {
+            "model_name": model_name,
+            "temperature": temperature,
+            "mission_collection": "knowledge_collection",
+            "proposals_collection": "proposals",
+            "enable_web_search": True,
+            "planning_model": "gpt-4.1-mini",
         }
 
-        # Create and run workflow with model settings from prompt
-        workflow = ProposalEvaluationWorkflow(
-            model_name=model_name, temperature=temperature
+        if debug_level > 0:
+            config["debug_level"] = debug_level
+            logger.debug(f"[PROPOSAL_DEBUG] Setting debug_level to {debug_level}")
+
+        if not dao_id and proposal_data.dao_id:
+            dao_id = proposal_data.dao_id
+        dao_info = None
+        if dao_id:
+            dao_info = backend.get_dao(dao_id)
+            if dao_info:
+                config["dao_mission"] = dao_info.mission
+
+        treasury_balance = None
+        try:
+            if dao_id:
+                treasury_extensions = backend.list_extensions(
+                    ExtensionFilter(dao_id=dao_id, type="EXTENSIONS_TREASURY")
+                )
+                if treasury_extensions:
+                    hiro_api = HiroApi()
+                    treasury_balance = hiro_api.get_address_balance(
+                        treasury_extensions[0].contract_principal
+                    )
+        except Exception as e:
+            logger.error(f"Failed to get treasury balance: {str(e)}", exc_info=True)
+
+        logger.debug("Starting hierarchical evaluation workflow...")
+        eval_result = await evaluate_proposal(
+            proposal_id=str(proposal_id),
+            proposal_data=proposal_data.parameters,
+            config=config,
         )
-        if not workflow._validate_state(state):
-            error_msg = "Invalid workflow state"
-            logger.error(error_msg)
-            return {
-                "success": False,
-                "error": error_msg,
-            }
 
-        logger.debug("Starting workflow execution...")
-        result = await workflow.execute(state)
-        logger.debug("Workflow execution completed")
+        decision = eval_result.get("decision")
+        if decision is None:
+            decision = "Reject"
+            logger.warning(
+                f"No decision found in evaluation results, defaulting to '{decision}'"
+            )
 
-        # Extract transaction ID from vote result if available
-        tx_id = None
-        if result.get("vote_result") and result["vote_result"].get("output"):
-            # Try to extract tx_id from the output
-            output = result["vote_result"]["output"]
-            if isinstance(output, str) and "txid:" in output.lower():
-                # Extract the transaction ID from the output
-                for line in output.split("\n"):
-                    if "txid:" in line.lower():
-                        parts = line.split(":")
-                        if len(parts) > 1:
-                            tx_id = parts[1].strip()
-                            logger.debug(f"Transaction ID extracted: {tx_id}")
-                            break
-
-        # Prepare final result
-        final_result = {
-            "success": True,
-            "evaluation": {
-                "approve": result.get("approve", False),
-                "confidence_score": result.get("confidence_score", 0.0),
-                "reasoning": result.get(
-                    "reasoning", "Evaluation failed or not available"
-                ),
-            },
-            "vote_result": result.get("vote_result"),
-            "auto_voted": auto_vote
-            and result.get("confidence_score", 0.0) >= confidence_threshold,
-            "tx_id": tx_id,
-            "formatted_prompt": result.get(
-                "formatted_prompt", "Formatted prompt not available"
-            ),
-            "vector_results": result.get("vector_results"),
-            "recent_tweets": result.get("recent_tweets"),
-            "web_search_results": result.get("web_search_results"),
-            "treasury_balance": result.get("treasury_balance"),
-            "web_search_token_usage": result.get("web_search_token_usage"),
-            "evaluation_token_usage": result.get("evaluation_token_usage"),
-            "evaluation_model_info": result.get("evaluation_model_info"),
-            "web_search_model_info": result.get("web_search_model_info"),
-        }
+        score = eval_result.get("score", 0)
+        confidence_score = score / 100.0 if score else 0.0
 
-        # --- Aggregate Token Usage and Calculate Costs --- #
-        total_token_usage_by_model = {}
-        total_cost_by_model = {}
-        total_overall_cost = 0.0
-
-        steps = [
-            (
-                "web_search",
-                result.get("web_search_token_usage"),
-                result.get("web_search_model_info"),
-            ),
-            (
-                "evaluation",
-                result.get("evaluation_token_usage"),
-                result.get("evaluation_model_info"),
-            ),
-        ]
+        approve = False
+        if isinstance(decision, str) and decision.lower() == "approve":
+            approve = True
 
-        for step_name, usage, model_info in steps:
-            if usage and model_info and model_info.get("name") != "unknown":
-                model_name = model_info["name"]
-
-                # Aggregate usage per model
-                if model_name not in total_token_usage_by_model:
-                    total_token_usage_by_model[model_name] = {
-                        "input_tokens": 0,
-                        "output_tokens": 0,
-                        "total_tokens": 0,
-                    }
-                total_token_usage_by_model[model_name]["input_tokens"] += usage.get(
-                    "input_tokens", 0
-                )
-                total_token_usage_by_model[model_name]["output_tokens"] += usage.get(
-                    "output_tokens", 0
-                )
-                total_token_usage_by_model[model_name]["total_tokens"] += usage.get(
-                    "total_tokens", 0
-                )
+        should_vote = auto_vote and confidence_score >= confidence_threshold
 
-                # Calculate cost for this step/model
-                step_cost = calculate_token_cost(usage, model_name)
+        vote_result = None
+        tx_id = None
+        if should_vote and wallet_id:
+            try:
+                vote_tool = VoteOnActionProposalTool(wallet_id=wallet_id)
+                if proposal_data.type == ProposalType.ACTION:
+                    contract_info = proposal_data.contract_principal
+                    if "." in contract_info:
+                        parts = contract_info.split(".")
+                        if len(parts) >= 2:
+                            action_proposals_contract = parts[0]
+                            action_proposals_voting_extension = parts[1]
+                            result = await vote_tool.vote_on_proposal(
+                                contract_principal=action_proposals_contract,
+                                extension_name=action_proposals_voting_extension,
+                                proposal_id=proposal_data.proposal_id,
+                                vote=approve,
+                            )
+                            vote_result = {
+                                "success": result is not None,
+                                "output": result,
+                            }
+                            if (
+                                result
+                                and isinstance(result, str)
+                                and "txid:" in result.lower()
+                            ):
+                                for line in result.split("\n"):
+                                    if "txid:" in line.lower():
+                                        parts = line.split(":")
+                                        if len(parts) > 1:
+                                            tx_id = parts[1].strip()
+                                            break
+                    else:
+                        logger.warning(
+                            f"Invalid contract principal format: {contract_info}"
+                        )
+                else:
+                    logger.warning(
+                        f"Cannot vote on non-action proposal type: {proposal_data.type}"
+                    )
+            except Exception as e:
+                logger.error(f"Error executing vote: {str(e)}", exc_info=True)
+                vote_result = {
+                    "success": False,
+                    "error": f"Error during voting: {str(e)}",
+                }
+        elif not should_vote:
+            vote_result = {
+                "success": True,
+                "message": "Voting skipped due to confidence threshold or auto_vote setting",
+                "data": None,
+            }
 
-                # Aggregate cost per model
-                if model_name not in total_cost_by_model:
-                    total_cost_by_model[model_name] = 0.0
-                total_cost_by_model[model_name] += step_cost["total_cost"]
-                total_overall_cost += step_cost["total_cost"]
+        total_token_usage = eval_result.get("token_usage", {})
+        total_input_tokens = 0
+        total_output_tokens = 0
+        total_tokens = 0
+
+        # Aggregate tokens from all agent steps
+        # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
+        # If each agent could use a different model, this would need more detailed per-model tracking
+        for agent_key, usage_data in total_token_usage.items():
+            if isinstance(usage_data, dict):
+                total_input_tokens += usage_data.get("input_tokens", 0)
+                total_output_tokens += usage_data.get("output_tokens", 0)
+                total_tokens += usage_data.get("total_tokens", 0)
             else:
                 logger.warning(
-                    f"Skipping cost calculation for step '{step_name}' due to missing usage or model info."
+                    f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
                 )
 
-        final_result["total_token_usage_by_model"] = total_token_usage_by_model
-        final_result["total_cost_by_model"] = total_cost_by_model
-        final_result["total_overall_cost"] = total_overall_cost
-        # --- End Aggregation --- #
+        # Placeholder for web search specific token usage if it were tracked separately
+        # In the original, these seemed to be fixed placeholders.
+        web_search_input_tokens = 0
+        web_search_output_tokens = 0
+        web_search_total_tokens = 0
+
+        # Initialize total_token_usage_by_model
+        total_token_usage_by_model = {}
+
+        # Use the default model name from settings or default to gpt-4.1
+        default_model = model_name or "gpt-4.1"
+
+        # Add total token counts to the model
+        total_token_usage_by_model[default_model] = {
+            "input_tokens": total_input_tokens,
+            "output_tokens": total_output_tokens,
+            "total_tokens": total_tokens,
+        }
+
+        # Improved cost calculation by model
+        cost_per_thousand = {
+            "gpt-4.1": 0.01,  # $0.01 per 1K tokens
+            "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
+            "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
+            "gpt-4": 0.03,  # $0.03 per 1K tokens
+            "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
+            "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
+            "default": 0.01,  # default fallback
+        }
+
+        # Calculate costs for each model
+        total_cost_by_model = {}
+        total_overall_cost = 0.0
+        for model_key, usage in total_token_usage_by_model.items():
+            # Get cost per 1K tokens for this model
+            model_cost_per_k = cost_per_thousand.get(
+                model_key, cost_per_thousand["default"]
+            )
+            # Calculate cost for this model's usage
+            model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
+            total_cost_by_model[model_key] = model_cost
+            total_overall_cost += model_cost
+
+        if not total_cost_by_model:
+            # Fallback if no models were recorded
+            default_model_key = "gpt-4.1"  # Default model name
+            total_cost_by_model[default_model_key] = total_tokens * (
+                cost_per_thousand["default"] / 1000
+            )
+            total_overall_cost = total_cost_by_model[default_model_key]
+
+        final_result = {
+            "success": True,
+            "evaluation": {
+                "approve": approve,
+                "confidence_score": confidence_score,
+                "reasoning": eval_result.get("explanation", ""),
+            },
+            "vote_result": vote_result,
+            "auto_voted": should_vote,
+            "tx_id": tx_id,
+            "vector_results": [],
+            "recent_tweets": [],
+            "web_search_results": [],
+            "treasury_balance": treasury_balance,
+            "component_scores": eval_result.get("component_scores", {}),
+            "component_summaries": eval_result.get("summaries", {}),
+            "flags": eval_result.get("flags", []),
+            "web_search_token_usage": {
+                "input_tokens": web_search_input_tokens,
+                "output_tokens": web_search_output_tokens,
+                "total_tokens": web_search_total_tokens,
+            },
+            "evaluation_token_usage": {
+                "input_tokens": total_input_tokens,
+                "output_tokens": total_output_tokens,
+                "total_tokens": total_tokens,
+            },
+            "evaluation_model_info": {"name": model_name, "temperature": temperature},
+            "web_search_model_info": {"name": model_name, "temperature": temperature},
+            "total_token_usage_by_model": total_token_usage_by_model,
+            "total_cost_by_model": total_cost_by_model,
+            "total_overall_cost": total_overall_cost,
+        }
 
-        # Updated Logging
         logger.debug(
-            f"Proposal evaluation completed: Success={final_result['success']} | "
-            f"Decision={'APPROVE' if final_result['evaluation']['approve'] else 'REJECT'} | "
-            f"Confidence={final_result['evaluation']['confidence_score']:.2f} | "
-            f"Auto-voted={final_result['auto_voted']} | Transaction={tx_id or 'None'} | "
-            f"Total Cost (USD)=${total_overall_cost:.4f}"
+            f"Proposal evaluation completed: Success={final_result['success']} | Decision={'APPROVE' if approve else 'REJECT'} | Confidence={confidence_score:.2f} | Auto-voted={should_vote} | Transaction={tx_id or 'None'}"
         )
-        logger.debug(f"Cost Breakdown: {total_cost_by_model}")
-        logger.debug(f"Token Usage Breakdown: {total_token_usage_by_model}")
-        logger.debug(f"Full evaluation result: {final_result}")
-
         return final_result
     except Exception as e:
         error_msg = f"Unexpected error in evaluate_and_vote_on_proposal: {str(e)}"
         logger.error(error_msg, exc_info=True)
-        return {
-            "success": False,
-            "error": error_msg,
-        }
+        return {"success": False, "error": error_msg}
 
 
 async def evaluate_proposal_only(
@@ -1165,20 +2012,8 @@ async def evaluate_proposal_only(
     agent_id: Optional[UUID] = None,
     dao_id: Optional[UUID] = None,
 ) -> Dict:
-    """Evaluate a proposal without voting.
-
-    Args:
-        proposal_id: The ID of the proposal to evaluate
-        wallet_id: Optional wallet ID to use for retrieving proposal data
-        agent_id: Optional agent ID associated with the evaluation
-        dao_id: Optional DAO ID associated with the proposal
-
-    Returns:
-        Dictionary containing the evaluation results
-    """
+    """Evaluate a proposal without voting."""
     logger.debug(f"Starting proposal-only evaluation: proposal_id={proposal_id}")
-
-    # Determine effective agent ID (same logic as evaluate_and_vote)
     effective_agent_id = agent_id
     if not effective_agent_id and wallet_id:
         wallet = backend.get_wallet(wallet_id)
@@ -1193,7 +2028,6 @@ async def evaluate_proposal_only(
         auto_vote=False,
     )
 
-    # Remove vote-related fields from the response
     logger.debug("Removing vote-related fields from response")
     if "vote_result" in result:
         del result["vote_result"]

From 1e64010028babc616917a90a45d03ae0244464aa Mon Sep 17 00:00:00 2001
From: human058382928 <162091348+human058382928@users.noreply.github.com>
Date: Fri, 9 May 2025 16:58:19 -0700
Subject: [PATCH 2/5] update

---
 examples/proposal_evaluation_example.py   |   2 +-
 services/workflows/proposal_evaluation.py | 254 +++++++++++++++++-----
 2 files changed, 197 insertions(+), 59 deletions(-)

diff --git a/examples/proposal_evaluation_example.py b/examples/proposal_evaluation_example.py
index 324bd2c9..45615822 100644
--- a/examples/proposal_evaluation_example.py
+++ b/examples/proposal_evaluation_example.py
@@ -35,7 +35,7 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
         The ID of the created proposal
     """
     # Create test parameters as a JSON object
-    parameters = "let this rip https://media1.giphy.com/media/v1.Y2lkPTc5MGI3NjExN3VoZzJzdmV3eGs4M2VrOXBkamg2dTVhb2NhcndwNzVxNHplMzhoaiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/M7HkIkPrNhSy4/giphy.gif https://mkkhfmcrbwyuutcvtier.supabase.co/storage/v1/object/public/x-vote-media//img_2.jpeg"
+    parameters = "I Publius.btc will do a $FACES airdrop to as many bitcoin faces holders as possible. I will report back with a confirmation message and proof. Give me a shot."
 
     # # Convert parameters to JSON string and then hex encode it
     # parameters_hex = "0x" + binascii.hexlify(parameters.encode("utf-8")).decode("utf-8")
diff --git a/services/workflows/proposal_evaluation.py b/services/workflows/proposal_evaluation.py
index 9ebfd6bb..a70dbb48 100644
--- a/services/workflows/proposal_evaluation.py
+++ b/services/workflows/proposal_evaluation.py
@@ -81,46 +81,69 @@ def no_update_reducer(current: Any, new: List[Any]) -> Any:
     return current
 
 
-def merge_dict_override_fn(key, values):
-    """Merge dictionaries by taking the last non-None value."""
-    # Handle case where values is None
-    if values is None:
+def merge_dicts(current: Optional[Dict], updates: List[Optional[Dict]]) -> Dict:
+    """Merge multiple dictionary updates into the current dictionary."""
+    # Initialize current if it's None
+    if current is None:
+        current = {}
+
+    # Handle case where updates is None
+    if updates is None:
+        return current
+
+    # Process updates if it's a list
+    if isinstance(updates, list):
+        for update in updates:
+            if update and isinstance(update, dict):
+                current.update(update)
+    # Handle case where updates is a single dictionary, not a list
+    elif isinstance(updates, dict):
+        current.update(updates)
+
+    return current
+
+
+def set_once(current: Any, updates: List[Any]) -> Any:
+    """Set the value once and prevent further updates."""
+    # If current already has a value, return it unchanged
+    if current is not None:
+        return current
+
+    # Handle case where updates is None instead of a list
+    if updates is None:
         return None
 
-    # Handle case where values is not iterable
-    if not hasattr(values, "__iter__"):
-        return values
+    # Process updates if it's a list
+    if isinstance(updates, list):
+        for update in updates:
+            if update is not None:
+                return update
+    # Handle case where updates is a single value, not a list
+    elif updates is not None:
+        return updates
 
-    result = None
-    for value in values:
-        if value is not None:
-            result = value
-    return result
+    return current
 
 
 class ProposalEvaluationState(TypedDict):
     """Type definition for the proposal evaluation state."""
 
-    proposal_id: Annotated[str, no_update_reducer]  # Read-only during execution
-    proposal_data: Annotated[str, no_update_reducer]  # Now a string, not a dict
-    core_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
-    historical_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
-    financial_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
-    social_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
-    final_score: Annotated[Optional[Dict[str, Any]], merge_dict_override_fn]
-    flags: Annotated[List[str], append_list_fn]  # Merges lists of flags
-    summaries: Annotated[
-        Dict[str, str], merge_dict_fn
-    ]  # Merges dictionaries of summaries
-    decision: Annotated[Optional[str], merge_dict_override_fn]
-    halt: Annotated[bool, operator.or_]  # Use OR for boolean flags
+    proposal_id: Annotated[str, no_update_reducer]
+    proposal_data: Annotated[str, no_update_reducer]
+    core_score: Annotated[Optional[Dict[str, Any]], set_once]
+    historical_score: Annotated[Optional[Dict[str, Any]], set_once]
+    financial_score: Annotated[Optional[Dict[str, Any]], set_once]
+    social_score: Annotated[Optional[Dict[str, Any]], set_once]
+    final_score: Annotated[Optional[Dict[str, Any]], set_once]
+    flags: Annotated[List[str], append_list_fn]  # Correctly appends lists
+    summaries: Annotated[Dict[str, str], merge_dicts]  # Properly merges dictionaries
+    decision: Annotated[Optional[str], set_once]
+    halt: Annotated[bool, operator.or_]
     token_usage: Annotated[
-        Dict[str, Dict[str, int]], merge_dict_fn
-    ]  # Merges nested dictionaries
-    core_agent_invocations: Annotated[int, operator.add]  # Counts should add
-    proposal_images: Annotated[
-        Optional[List[Dict]], merge_dict_override_fn
-    ]  # ADDED: To store encoded images
+        Dict[str, Dict[str, int]], merge_dicts
+    ]  # Properly merges dictionaries
+    core_agent_invocations: Annotated[int, operator.add]
+    proposal_images: Annotated[Optional[List[Dict]], set_once]
 
 
 class AgentOutput(BaseModel):
@@ -143,9 +166,34 @@ def update_state_with_agent_result(
     state: ProposalEvaluationState, agent_result: Dict[str, Any], agent_name: str
 ):
     """Helper function to update state with agent result including summaries and flags."""
+    # ADDED DEBUG: Log the incoming data
+    logger.debug(
+        f"[DEBUG:update_state:{agent_name}] Updating state with agent result: {agent_result}"
+    )
+    logger.debug(
+        f"[DEBUG:update_state:{agent_name}] Current state before update - {agent_name}_score: {state.get(f'{agent_name}_score')}"
+    )
+
     # Update agent score in state
     if agent_name in ["core", "historical", "financial", "social", "final"]:
-        state[f"{agent_name}_score"] = agent_result
+        # Make a copy of agent_result to avoid modifying the original
+        score_dict = dict(agent_result)
+        # Don't pass token_usage through this path to avoid duplication
+        if "token_usage" in score_dict:
+            del score_dict["token_usage"]
+
+        # ADDED DEBUG: Log what we're about to assign
+        logger.debug(
+            f"[DEBUG:update_state:{agent_name}] Setting {agent_name}_score to: {score_dict}"
+        )
+
+        # Directly assign the dictionary to the state key
+        state[f"{agent_name}_score"] = score_dict
+
+        # ADDED DEBUG: Immediately verify what was assigned
+        logger.debug(
+            f"[DEBUG:update_state:{agent_name}] Immediate check - {agent_name}_score now: {state.get(f'{agent_name}_score')}"
+        )
 
     # Update summaries
     if "summaries" not in state:
@@ -161,19 +209,13 @@ def update_state_with_agent_result(
     if "flags" in agent_result and isinstance(agent_result["flags"], list):
         state["flags"].extend(agent_result["flags"])
 
-    # Update token usage
-    if (
-        "token_usage" in state
-        and isinstance(state["token_usage"], dict)
-        and f"{agent_name}_agent" in state["token_usage"]
-    ):
-        # Token usage has been set by the agent directly
-        pass
-    elif hasattr(agent_result, "get") and agent_result.get("token_usage"):
-        # Token usage available in the result
-        if "token_usage" not in state:
-            state["token_usage"] = {}
-        state["token_usage"][f"{agent_name}_agent"] = agent_result.get("token_usage")
+    # Note: Token usage is already directly handled by each agent via state["token_usage"]["{agent_name}_agent"]
+    # So we don't need to do anything with token usage here
+
+    # ADDED DEBUG: Log final state
+    logger.debug(
+        f"[DEBUG:update_state:{agent_name}] Final state after update - {agent_name}_score: {state.get(f'{agent_name}_score')}"
+    )
 
     return state
 
@@ -351,6 +393,7 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
                     "output_tokens": len(result.model_dump_json())
                     // 4,  # rough estimate
                     "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                    "model_name": llm_model_name,  # Include model name
                 }
                 self.logger.debug(
                     f"[DEBUG:CoreAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
@@ -362,8 +405,27 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
             state["token_usage"]["core_agent"] = token_usage_data
 
             result_dict = result.model_dump()
+            # Add token usage to result_dict so it's properly processed
+            result_dict["token_usage"] = token_usage_data
+
+            # ADDED DEBUG: Log the exact result dictionary before state update
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] BEFORE STATE UPDATE: Result dict to be added to state: {result_dict}"
+            )
+
+            # Capture state before update for debugging
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] State before update - core_score: {state.get('core_score')}"
+            )
+
             # Update state with the result
             update_state_with_agent_result(state, result_dict, "core")
+
+            # ADDED DEBUG: Log the state after update
+            self.logger.debug(
+                f"[DEBUG:CoreAgent:{proposal_id}] AFTER STATE UPDATE: core_score in state: {state.get('core_score')}"
+            )
+
             return result_dict
         except Exception as e:
             self.logger.error(
@@ -527,6 +589,7 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
                     "output_tokens": len(result.model_dump_json())
                     // 4,  # rough estimate
                     "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                    "model_name": llm_model_name,  # Include model name
                 }
                 self.logger.debug(
                     f"[DEBUG:HistoricalAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
@@ -538,6 +601,9 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
             state["token_usage"]["historical_agent"] = token_usage_data
 
             result_dict = result.model_dump()
+            # Add token usage to result_dict so it's properly processed
+            result_dict["token_usage"] = token_usage_data
+
             # Update state with the result
             update_state_with_agent_result(state, result_dict, "historical")
             return result_dict
@@ -665,6 +731,7 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
                     "output_tokens": len(result.model_dump_json())
                     // 4,  # rough estimate
                     "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                    "model_name": llm_model_name,  # Include model name
                 }
                 self.logger.debug(
                     f"[DEBUG:FinancialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
@@ -676,6 +743,9 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
             state["token_usage"]["financial_agent"] = token_usage_data
 
             result_dict = result.model_dump()
+            # Add token usage to result_dict so it's properly processed
+            result_dict["token_usage"] = token_usage_data
+
             # Update state with the result
             update_state_with_agent_result(state, result_dict, "financial")
             return result_dict
@@ -711,7 +781,7 @@ async def process(self, state: ProposalEvaluationState) -> List[Dict[str, Any]]:
             self.logger.info(
                 f"[ImageProcessorNode:{proposal_id}] No proposal_data string, skipping image processing."
             )
-            return []
+            return []  # Return empty list, not None
 
         self.logger.info(
             f"[ImageProcessorNode:{proposal_id}] Starting image processing."
@@ -722,7 +792,7 @@ async def process(self, state: ProposalEvaluationState) -> List[Dict[str, Any]]:
             self.logger.info(
                 f"[ImageProcessorNode:{proposal_id}] No image URLs found in proposal data."
             )
-            return []
+            return []  # Return empty list, not None
 
         self.logger.info(
             f"[ImageProcessorNode:{proposal_id}] Found {len(image_urls)} image URLs: {image_urls}"
@@ -778,7 +848,7 @@ async def process(self, state: ProposalEvaluationState) -> List[Dict[str, Any]]:
         self.logger.info(
             f"[ImageProcessorNode:{proposal_id}] Finished. {len(processed_images)} images processed."
         )
-        return processed_images
+        return processed_images  # This will be a list, possibly empty
 
 
 class SocialContextAgent(BaseCapabilityMixin, WebSearchCapability):
@@ -933,6 +1003,7 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
                     "output_tokens": len(result.model_dump_json())
                     // 4,  # rough estimate
                     "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                    "model_name": llm_model_name,  # Include model name
                 }
                 self.logger.debug(
                     f"[DEBUG:SocialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
@@ -944,6 +1015,9 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
             state["token_usage"]["social_agent"] = token_usage_data
 
             result_dict = result.model_dump()
+            # Add token usage to result_dict so it's properly processed
+            result_dict["token_usage"] = token_usage_data
+
             # Update state with the result
             update_state_with_agent_result(state, result_dict, "social")
             return result_dict
@@ -1150,6 +1224,7 @@ def safe_get_score(value, default=0):
                     "output_tokens": len(result.model_dump_json())
                     // 4,  # rough estimate
                     "total_tokens": token_count + len(result.model_dump_json()) // 4,
+                    "model_name": llm_model_name,  # Include model name
                 }
                 self.logger.debug(
                     f"[DEBUG:ReasoningAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
@@ -1161,6 +1236,9 @@ def safe_get_score(value, default=0):
             state["token_usage"]["reasoning_agent"] = token_usage_data
 
             result_dict = result.model_dump()
+            # Add token usage to result_dict so it's properly processed
+            result_dict["token_usage"] = token_usage_data
+
             # Update state with the result
             update_state_with_agent_result(state, result_dict, "reasoning")
             return result_dict
@@ -1434,6 +1512,28 @@ async def evaluate_proposal(
             f"[DEBUG:Workflow:{proposal_id}] Workflow execution completed with decision: {result.get('decision', 'Unknown')}"
         )
 
+        # ADDED DEBUG: More comprehensive logging of result structure
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] RESULT STRUCTURE: {list(result.keys())}"
+        )
+
+        # ADDED DEBUG: Log full core_score and other scores
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] FULL CORE SCORE: {result.get('core_score')}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] FULL HISTORICAL SCORE: {result.get('historical_score')}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] FULL FINANCIAL SCORE: {result.get('financial_score')}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] FULL SOCIAL SCORE: {result.get('social_score')}"
+        )
+        logger.debug(
+            f"[DEBUG:Workflow:{proposal_id}] FULL FINAL SCORE: {result.get('final_score')}"
+        )
+
         logger.debug(f"[DEBUG:Workflow:{proposal_id}] RESULT SCORES TYPES:")
         logger.debug(
             f"[DEBUG:Workflow:{proposal_id}] - Core: {type(result.get('core_score'))} = {repr(result.get('core_score'))}"
@@ -1474,15 +1574,36 @@ async def evaluate_proposal(
             }
 
         def safe_extract_score(value, default=0):
+            # ADDED DEBUG: Log what we're trying to extract
+            logger.debug(
+                f"[DEBUG:safe_extract_score] Extracting score from: {repr(value)} (type: {type(value)})"
+            )
+
             if isinstance(value, dict) and "score" in value:
-                return value.get("score", default)
+                score_val = value.get("score", default)
+                logger.debug(
+                    f"[DEBUG:safe_extract_score] Found score in dict: {score_val}"
+                )
+                return score_val
             elif isinstance(value, int):
+                logger.debug(
+                    f"[DEBUG:safe_extract_score] Value is already int: {value}"
+                )
                 return value
             elif isinstance(value, str):
+                logger.debug(f"[DEBUG:safe_extract_score] Value is string: '{value}'")
                 try:
-                    return int(value)
+                    int_val = int(value)
+                    logger.debug(
+                        f"[DEBUG:safe_extract_score] Converted string to int: {int_val}"
+                    )
+                    return int_val
                 except ValueError:
+                    logger.debug(
+                        f"[DEBUG:safe_extract_score] Could not convert string to int"
+                    )
                     pass  # If string is not int, will fall through to default
+            logger.debug(f"[DEBUG:safe_extract_score] Using default: {default}")
             return default
 
         final_score_val = result.get("final_score")
@@ -1541,11 +1662,13 @@ def safe_extract_score(value, default=0):
         # Aggregate tokens from all agent steps
         # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
         # If each agent could use a different model, this would need more detailed per-model tracking
+        logger.debug(f"Token usage entries in result: {list(total_token_usage.keys())}")
         for agent_key, usage_data in total_token_usage.items():
             if isinstance(usage_data, dict):
                 total_input_tokens += usage_data.get("input_tokens", 0)
                 total_output_tokens += usage_data.get("output_tokens", 0)
                 total_tokens += usage_data.get("total_tokens", 0)
+                logger.debug(f"Token usage for {agent_key}: {usage_data}")
             else:
                 logger.warning(
                     f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
@@ -1573,8 +1696,15 @@ def safe_extract_score(value, default=0):
         # Extract token usage by model from token_usage data
         for agent_name, agent_usage in total_token_usage.items():
             if isinstance(agent_usage, dict) and agent_usage.get("total_tokens", 0) > 0:
-                # Use default model name if not specified
-                model_name = "gpt-4.1"  # default model name
+                # Get model name from config, or use default
+                model_name = config.get(
+                    "model_name", "gpt-4.1"
+                )  # Use configured model name
+
+                # Extract model name from each agent usage if available
+                # This would require each agent to include model info in their token usage
+                if "model_name" in agent_usage:
+                    model_name = agent_usage["model_name"]
 
                 # Initialize the model entry if needed
                 if model_name not in total_token_usage_by_model:
@@ -1648,7 +1778,7 @@ def safe_extract_score(value, default=0):
             "component_scores": component_scores,
             "component_summaries": component_summaries,  # Include component summaries
             "flags": all_flags,
-            "token_usage": total_token_usage,
+            "token_usage": total_token_usage,  # Include all token usage details
             "web_search_results": [],
             "treasury_balance": None,
             "web_search_token_usage": {
@@ -1661,12 +1791,17 @@ def safe_extract_score(value, default=0):
                 "output_tokens": total_output_tokens,
                 "total_tokens": total_tokens,
             },
-            "evaluation_model_info": {"name": "gpt-4.1", "temperature": 0.1},
-            "web_search_model_info": {"name": "gpt-4.1", "temperature": 0.1},
+            "evaluation_model_info": {
+                "name": config.get("model_name", "gpt-4.1"),
+                "temperature": config.get("temperature", 0.1),
+            },
+            "web_search_model_info": {
+                "name": config.get("model_name", "gpt-4.1"),
+                "temperature": config.get("temperature", 0.1),
+            },
             "total_token_usage_by_model": total_token_usage_by_model,
             "total_cost_by_model": total_cost_by_model,
             "total_overall_cost": total_overall_cost,
-            "summaries": component_summaries,
         }
 
         logger.debug(
@@ -1901,11 +2036,13 @@ async def evaluate_and_vote_on_proposal(
         # Aggregate tokens from all agent steps
         # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
         # If each agent could use a different model, this would need more detailed per-model tracking
+        logger.debug(f"Token usage entries in result: {list(total_token_usage.keys())}")
         for agent_key, usage_data in total_token_usage.items():
             if isinstance(usage_data, dict):
                 total_input_tokens += usage_data.get("input_tokens", 0)
                 total_output_tokens += usage_data.get("output_tokens", 0)
                 total_tokens += usage_data.get("total_tokens", 0)
+                logger.debug(f"Token usage for {agent_key}: {usage_data}")
             else:
                 logger.warning(
                     f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
@@ -1974,11 +2111,12 @@ async def evaluate_and_vote_on_proposal(
             "tx_id": tx_id,
             "vector_results": [],
             "recent_tweets": [],
-            "web_search_results": [],
+            "web_search_results": eval_result.get("web_search_results", []),
             "treasury_balance": treasury_balance,
             "component_scores": eval_result.get("component_scores", {}),
-            "component_summaries": eval_result.get("summaries", {}),
+            "component_summaries": eval_result.get("component_summaries", {}),
             "flags": eval_result.get("flags", []),
+            "token_usage": total_token_usage,  # Pass the complete token_usage dictionary
             "web_search_token_usage": {
                 "input_tokens": web_search_input_tokens,
                 "output_tokens": web_search_output_tokens,

From 6aee5b18b7a6779b2e95c2bf313a1657e19404d0 Mon Sep 17 00:00:00 2001
From: human058382928 <162091348+human058382928@users.noreply.github.com>
Date: Fri, 9 May 2025 17:06:28 -0700
Subject: [PATCH 3/5] update

---
 services/workflows/proposal_evaluation.py | 267 +++++++---------------
 1 file changed, 85 insertions(+), 182 deletions(-)

diff --git a/services/workflows/proposal_evaluation.py b/services/workflows/proposal_evaluation.py
index a70dbb48..46d3ac98 100644
--- a/services/workflows/proposal_evaluation.py
+++ b/services/workflows/proposal_evaluation.py
@@ -166,12 +166,9 @@ def update_state_with_agent_result(
     state: ProposalEvaluationState, agent_result: Dict[str, Any], agent_name: str
 ):
     """Helper function to update state with agent result including summaries and flags."""
-    # ADDED DEBUG: Log the incoming data
+    # Simplified logging - just log once with relevant details
     logger.debug(
-        f"[DEBUG:update_state:{agent_name}] Updating state with agent result: {agent_result}"
-    )
-    logger.debug(
-        f"[DEBUG:update_state:{agent_name}] Current state before update - {agent_name}_score: {state.get(f'{agent_name}_score')}"
+        f"[DEBUG:update_state:{agent_name}] Updating state with {agent_name}_score (score: {agent_result.get('score', 'N/A')})"
     )
 
     # Update agent score in state
@@ -182,19 +179,9 @@ def update_state_with_agent_result(
         if "token_usage" in score_dict:
             del score_dict["token_usage"]
 
-        # ADDED DEBUG: Log what we're about to assign
-        logger.debug(
-            f"[DEBUG:update_state:{agent_name}] Setting {agent_name}_score to: {score_dict}"
-        )
-
         # Directly assign the dictionary to the state key
         state[f"{agent_name}_score"] = score_dict
 
-        # ADDED DEBUG: Immediately verify what was assigned
-        logger.debug(
-            f"[DEBUG:update_state:{agent_name}] Immediate check - {agent_name}_score now: {state.get(f'{agent_name}_score')}"
-        )
-
     # Update summaries
     if "summaries" not in state:
         state["summaries"] = {}
@@ -212,11 +199,6 @@ def update_state_with_agent_result(
     # Note: Token usage is already directly handled by each agent via state["token_usage"]["{agent_name}_agent"]
     # So we don't need to do anything with token usage here
 
-    # ADDED DEBUG: Log final state
-    logger.debug(
-        f"[DEBUG:update_state:{agent_name}] Final state after update - {agent_name}_score: {state.get(f'{agent_name}_score')}"
-    )
-
     return state
 
 
@@ -408,24 +390,9 @@ async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
             # Add token usage to result_dict so it's properly processed
             result_dict["token_usage"] = token_usage_data
 
-            # ADDED DEBUG: Log the exact result dictionary before state update
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] BEFORE STATE UPDATE: Result dict to be added to state: {result_dict}"
-            )
-
-            # Capture state before update for debugging
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] State before update - core_score: {state.get('core_score')}"
-            )
-
-            # Update state with the result
+            # Remove verbose debug logs and simply update state
             update_state_with_agent_result(state, result_dict, "core")
 
-            # ADDED DEBUG: Log the state after update
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] AFTER STATE UPDATE: core_score in state: {state.get('core_score')}"
-            )
-
             return result_dict
         except Exception as e:
             self.logger.error(
@@ -1464,14 +1431,8 @@ async def evaluate_proposal(
     debug_level = 0
     if config and "debug_level" in config:
         debug_level = config.get("debug_level", 0)
-        logger.debug(f"[PROPOSAL_DEBUG] Using debug_level: {debug_level}")
-
-    logger.debug(
-        f"[PROPOSAL_DEBUG] evaluate_proposal received proposal_id: {proposal_id}"
-    )
-    logger.debug(
-        f"[PROPOSAL_DEBUG] evaluate_proposal received proposal_data type: {type(proposal_data)}"
-    )
+        if debug_level > 0:
+            logger.debug(f"[PROPOSAL_DEBUG] Using debug_level: {debug_level}")
 
     if not proposal_data:
         logger.warning(
@@ -1495,13 +1456,6 @@ async def evaluate_proposal(
         "recursion_count": 0,
     }
 
-    logger.debug(
-        f"[DEBUG:Workflow:{proposal_id}] Initialized workflow state with keys: {state.keys()}"
-    )
-    logger.debug(
-        f"[PROPOSAL_DEBUG] Proposal data in state: {state.get('proposal_data')}"
-    )
-
     try:
         workflow = ProposalEvaluationWorkflow(config or {})
         logger.info(
@@ -1512,47 +1466,30 @@ async def evaluate_proposal(
             f"[DEBUG:Workflow:{proposal_id}] Workflow execution completed with decision: {result.get('decision', 'Unknown')}"
         )
 
-        # ADDED DEBUG: More comprehensive logging of result structure
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] RESULT STRUCTURE: {list(result.keys())}"
-        )
-
-        # ADDED DEBUG: Log full core_score and other scores
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] FULL CORE SCORE: {result.get('core_score')}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] FULL HISTORICAL SCORE: {result.get('historical_score')}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] FULL FINANCIAL SCORE: {result.get('financial_score')}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] FULL SOCIAL SCORE: {result.get('social_score')}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] FULL FINAL SCORE: {result.get('final_score')}"
-        )
-
-        logger.debug(f"[DEBUG:Workflow:{proposal_id}] RESULT SCORES TYPES:")
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Core: {type(result.get('core_score'))} = {repr(result.get('core_score'))}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Historical: {type(result.get('historical_score'))} = {repr(result.get('historical_score'))}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Financial: {type(result.get('financial_score'))} = {repr(result.get('financial_score'))}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Social: {type(result.get('social_score'))} = {repr(result.get('social_score'))}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Final: {type(result.get('final_score'))} = {repr(result.get('final_score'))}"
-        )
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] - Decision: {type(result.get('decision'))} = {repr(result.get('decision'))}"
-        )
+        # Only output detailed debug info at higher debug levels
+        if debug_level >= 2:
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] RESULT STRUCTURE: {list(result.keys())}"
+            )
+            logger.debug(f"[DEBUG:Workflow:{proposal_id}] RESULT SCORES TYPES:")
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Core: {type(result.get('core_score'))} = {repr(result.get('core_score'))[:100]+'...' if len(repr(result.get('core_score'))) > 100 else repr(result.get('core_score'))}"
+            )
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Historical: {type(result.get('historical_score'))} = {repr(result.get('historical_score'))[:100]+'...' if len(repr(result.get('historical_score'))) > 100 else repr(result.get('historical_score'))}"
+            )
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Financial: {type(result.get('financial_score'))} = {repr(result.get('financial_score'))[:100]+'...' if len(repr(result.get('financial_score'))) > 100 else repr(result.get('financial_score'))}"
+            )
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Social: {type(result.get('social_score'))} = {repr(result.get('social_score'))[:100]+'...' if len(repr(result.get('social_score'))) > 100 else repr(result.get('social_score'))}"
+            )
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Final: {type(result.get('final_score'))} = {repr(result.get('final_score'))[:100]+'...' if len(repr(result.get('final_score'))) > 100 else repr(result.get('final_score'))}"
+            )
+            logger.debug(
+                f"[DEBUG:Workflow:{proposal_id}] - Decision: {type(result.get('decision'))} = {repr(result.get('decision'))}"
+            )
 
         if result is None:
             logger.error(
@@ -1574,43 +1511,18 @@ async def evaluate_proposal(
             }
 
         def safe_extract_score(value, default=0):
-            # ADDED DEBUG: Log what we're trying to extract
-            logger.debug(
-                f"[DEBUG:safe_extract_score] Extracting score from: {repr(value)} (type: {type(value)})"
-            )
-
             if isinstance(value, dict) and "score" in value:
-                score_val = value.get("score", default)
-                logger.debug(
-                    f"[DEBUG:safe_extract_score] Found score in dict: {score_val}"
-                )
-                return score_val
+                return value.get("score", default)
             elif isinstance(value, int):
-                logger.debug(
-                    f"[DEBUG:safe_extract_score] Value is already int: {value}"
-                )
                 return value
             elif isinstance(value, str):
-                logger.debug(f"[DEBUG:safe_extract_score] Value is string: '{value}'")
                 try:
-                    int_val = int(value)
-                    logger.debug(
-                        f"[DEBUG:safe_extract_score] Converted string to int: {int_val}"
-                    )
-                    return int_val
+                    return int(value)
                 except ValueError:
-                    logger.debug(
-                        f"[DEBUG:safe_extract_score] Could not convert string to int"
-                    )
                     pass  # If string is not int, will fall through to default
-            logger.debug(f"[DEBUG:safe_extract_score] Using default: {default}")
             return default
 
         final_score_val = result.get("final_score")
-        logger.debug(
-            f"[DEBUG:evaluate_proposal] Raw final_score_val from result state: {repr(final_score_val)} (type: {type(final_score_val)})"
-        )
-
         final_score_dict = {}
         if isinstance(final_score_val, dict):
             final_score_dict = final_score_val
@@ -1622,6 +1534,7 @@ def safe_extract_score(value, default=0):
             "social": safe_extract_score(result.get("social_score")),
         }
 
+        # This is a useful log to keep even at lower debug levels
         logger.debug(
             f"[DEBUG:Workflow:{proposal_id}] EXTRACTED COMPONENT SCORES: {component_scores}"
         )
@@ -2028,77 +1941,62 @@ async def evaluate_and_vote_on_proposal(
                 "data": None,
             }
 
+        # Get token usage data from eval_result
         total_token_usage = eval_result.get("token_usage", {})
         total_input_tokens = 0
         total_output_tokens = 0
         total_tokens = 0
 
-        # Aggregate tokens from all agent steps
-        # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
-        # If each agent could use a different model, this would need more detailed per-model tracking
-        logger.debug(f"Token usage entries in result: {list(total_token_usage.keys())}")
+        # Aggregate tokens from all agent steps - no need to log duplicates here
         for agent_key, usage_data in total_token_usage.items():
             if isinstance(usage_data, dict):
                 total_input_tokens += usage_data.get("input_tokens", 0)
                 total_output_tokens += usage_data.get("output_tokens", 0)
                 total_tokens += usage_data.get("total_tokens", 0)
-                logger.debug(f"Token usage for {agent_key}: {usage_data}")
-            else:
-                logger.warning(
-                    f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
-                )
-
-        # Placeholder for web search specific token usage if it were tracked separately
-        # In the original, these seemed to be fixed placeholders.
-        web_search_input_tokens = 0
-        web_search_output_tokens = 0
-        web_search_total_tokens = 0
-
-        # Initialize total_token_usage_by_model
-        total_token_usage_by_model = {}
 
-        # Use the default model name from settings or default to gpt-4.1
-        default_model = model_name or "gpt-4.1"
-
-        # Add total token counts to the model
-        total_token_usage_by_model[default_model] = {
-            "input_tokens": total_input_tokens,
-            "output_tokens": total_output_tokens,
-            "total_tokens": total_tokens,
-        }
-
-        # Improved cost calculation by model
-        cost_per_thousand = {
-            "gpt-4.1": 0.01,  # $0.01 per 1K tokens
-            "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
-            "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
-            "gpt-4": 0.03,  # $0.03 per 1K tokens
-            "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
-            "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
-            "default": 0.01,  # default fallback
-        }
+        # Initialize total_token_usage_by_model using data from eval_result
+        total_token_usage_by_model = eval_result.get("total_token_usage_by_model", {})
+        if not total_token_usage_by_model:
+            # Use the default model name from settings or default to gpt-4.1
+            default_model = model_name or "gpt-4.1"
+            # Add total token counts to the model
+            total_token_usage_by_model[default_model] = {
+                "input_tokens": total_input_tokens,
+                "output_tokens": total_output_tokens,
+                "total_tokens": total_tokens,
+            }
 
-        # Calculate costs for each model
-        total_cost_by_model = {}
-        total_overall_cost = 0.0
-        for model_key, usage in total_token_usage_by_model.items():
-            # Get cost per 1K tokens for this model
-            model_cost_per_k = cost_per_thousand.get(
-                model_key, cost_per_thousand["default"]
-            )
-            # Calculate cost for this model's usage
-            model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
-            total_cost_by_model[model_key] = model_cost
-            total_overall_cost += model_cost
+        # Get cost calculations from eval_result if available
+        total_cost_by_model = eval_result.get("total_cost_by_model", {})
+        total_overall_cost = eval_result.get("total_overall_cost", 0.0)
 
+        # If cost data is missing, calculate it
         if not total_cost_by_model:
-            # Fallback if no models were recorded
-            default_model_key = "gpt-4.1"  # Default model name
-            total_cost_by_model[default_model_key] = total_tokens * (
-                cost_per_thousand["default"] / 1000
-            )
-            total_overall_cost = total_cost_by_model[default_model_key]
+            # Improved cost calculation by model
+            cost_per_thousand = {
+                "gpt-4.1": 0.01,  # $0.01 per 1K tokens
+                "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
+                "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
+                "gpt-4": 0.03,  # $0.03 per 1K tokens
+                "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
+                "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
+                "default": 0.01,  # default fallback
+            }
 
+            # Calculate costs for each model
+            total_cost_by_model = {}
+            total_overall_cost = 0.0
+            for model_key, usage in total_token_usage_by_model.items():
+                # Get cost per 1K tokens for this model
+                model_cost_per_k = cost_per_thousand.get(
+                    model_key, cost_per_thousand["default"]
+                )
+                # Calculate cost for this model's usage
+                model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
+                total_cost_by_model[model_key] = model_cost
+                total_overall_cost += model_cost
+
+        # Construct final result with voting information added
         final_result = {
             "success": True,
             "evaluation": {
@@ -2116,12 +2014,15 @@ async def evaluate_and_vote_on_proposal(
             "component_scores": eval_result.get("component_scores", {}),
             "component_summaries": eval_result.get("component_summaries", {}),
             "flags": eval_result.get("flags", []),
-            "token_usage": total_token_usage,  # Pass the complete token_usage dictionary
-            "web_search_token_usage": {
-                "input_tokens": web_search_input_tokens,
-                "output_tokens": web_search_output_tokens,
-                "total_tokens": web_search_total_tokens,
-            },
+            "token_usage": total_token_usage,
+            "web_search_token_usage": eval_result.get(
+                "web_search_token_usage",
+                {
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "total_tokens": 0,
+                },
+            ),
             "evaluation_token_usage": {
                 "input_tokens": total_input_tokens,
                 "output_tokens": total_output_tokens,
@@ -2134,8 +2035,9 @@ async def evaluate_and_vote_on_proposal(
             "total_overall_cost": total_overall_cost,
         }
 
+        # Single log entry about the final result instead of duplicating token usage logs
         logger.debug(
-            f"Proposal evaluation completed: Success={final_result['success']} | Decision={'APPROVE' if approve else 'REJECT'} | Confidence={confidence_score:.2f} | Auto-voted={should_vote} | Transaction={tx_id or 'None'}"
+            f"Proposal evaluation completed with voting: Decision={'APPROVE' if approve else 'REJECT'} | Confidence={confidence_score:.2f} | Auto-voted={should_vote} | Transaction={tx_id or 'None'}"
         )
         return final_result
     except Exception as e:
@@ -2166,6 +2068,7 @@ async def evaluate_proposal_only(
         auto_vote=False,
     )
 
+    # Simplified logging - no need to duplicate what evaluate_and_vote_on_proposal already logged
     logger.debug("Removing vote-related fields from response")
     if "vote_result" in result:
         del result["vote_result"]

From 8e641f630ce7d01cc67a4c46a5fbb0881e37d0bc Mon Sep 17 00:00:00 2001
From: human058382928 <162091348+human058382928@users.noreply.github.com>
Date: Sat, 10 May 2025 17:55:36 -0700
Subject: [PATCH 4/5] update

---
 examples/proposal_evaluation_example.py       |   90 +-
 plan.md                                       |  556 ++++
 services/workflows/README.md                  |   66 +
 services/workflows/agents/__init__.py         |    0
 services/workflows/agents/core_context.py     |  144 ++
 .../workflows/agents/financial_context.py     |  130 +
 .../workflows/agents/historical_context.py    |  159 ++
 services/workflows/agents/image_processing.py |   95 +
 services/workflows/agents/reasoning.py        |  297 +++
 services/workflows/agents/social_context.py   |  209 ++
 services/workflows/base.py                    |   59 +-
 services/workflows/proposal_evaluation.py     | 2273 +++--------------
 services/workflows/utils/__init__.py          |    0
 services/workflows/utils/models.py            |   31 +
 services/workflows/utils/state_reducers.py    |  139 +
 services/workflows/utils/token_usage.py       |   64 +
 services/workflows/web_search_mixin.py        |    4 +-
 17 files changed, 2369 insertions(+), 1947 deletions(-)
 create mode 100644 plan.md
 create mode 100644 services/workflows/README.md
 create mode 100644 services/workflows/agents/__init__.py
 create mode 100644 services/workflows/agents/core_context.py
 create mode 100644 services/workflows/agents/financial_context.py
 create mode 100644 services/workflows/agents/historical_context.py
 create mode 100644 services/workflows/agents/image_processing.py
 create mode 100644 services/workflows/agents/reasoning.py
 create mode 100644 services/workflows/agents/social_context.py
 create mode 100644 services/workflows/utils/__init__.py
 create mode 100644 services/workflows/utils/models.py
 create mode 100644 services/workflows/utils/state_reducers.py
 create mode 100644 services/workflows/utils/token_usage.py

diff --git a/examples/proposal_evaluation_example.py b/examples/proposal_evaluation_example.py
index 45615822..61cae05f 100644
--- a/examples/proposal_evaluation_example.py
+++ b/examples/proposal_evaluation_example.py
@@ -35,7 +35,62 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
         The ID of the created proposal
     """
     # Create test parameters as a JSON object
-    parameters = "I Publius.btc will do a $FACES airdrop to as many bitcoin faces holders as possible. I will report back with a confirmation message and proof. Give me a shot."
+    # parameters = "I Publius.btc will do a $FACES airdrop to as many bitcoin faces holders as possible. I will report back with a confirmation message and proof. Give me a shot."
+    parameters = """
+
+Proposal Title: $FACES Airdrop to Bitcoin Faces Holders with Transparent Execution and Community Engagement
+
+Proposal ID: [Generate a new UUID for submission]
+
+Proposer: Publius.btc
+
+Proposal Data:
+I, Publius.btc, propose to execute a $FACES airdrop to Bitcoin Faces holders to boost community engagement and reward active participants in the DAO. The airdrop will distribute 10,000 $FACES tokens to eligible holders, with a clear execution plan, transparent verification, and measurable outcomes. The proposal aligns with the DAO’s mission to promote community activity and token utility. Below are the details:
+
+Objective: Distribute $FACES tokens to Bitcoin Faces holders to incentivize participation, increase governance engagement, and strengthen community ties.
+Eligibility Criteria:
+Holders of Bitcoin Faces NFTs as of a snapshot date (to be set 7 days after proposal approval).
+Minimum holding: 1 Bitcoin Faces NFT.
+Exclusion: Wallets flagged for suspicious activity (e.g., wash trading) based on on-chain analysis.
+Execution Plan:
+Snapshot: Conduct a blockchain snapshot of Bitcoin Faces holders on the specified date, using a third-party tool (e.g., Etherscan or equivalent for Bitcoin-based assets).
+Distribution: Distribute 10 $FACES per eligible wallet, up to a total of 10,000 tokens, via a smart contract to ensure transparency and immutability.
+Timeline:
+Day 1–7: Proposal approval and snapshot preparation.
+Day 8: Snapshot execution.
+Day 9–14: Smart contract deployment and testing.
+Day 15: Airdrop distribution.
+Day 20: Post-airdrop report published.
+Budget and Funding:
+Total Cost: 10,000 $FACES tokens (valued at $0.10 per token based on current market price, totaling $1,000).
+Additional Costs: $500 for smart contract development, auditing, and gas fees, to be funded from the DAO treasury.
+Funding Request: 10,000 $FACES tokens + $500 in stablecoins (e.g., USDC) from the DAO treasury.
+Cost Justification: The airdrop is cost-effective, targeting active holders to maximize engagement with minimal token dilution. The $500 covers secure execution to mitigate risks.
+Verification and Transparency:
+Publish the snapshot data and eligible wallet list on the DAO’s governance forum.
+Share the smart contract address and transaction hashes on-chain for public verification.
+Provide a detailed post-airdrop report within 5 days of distribution, including the number of wallets reached, tokens distributed, and community feedback.
+Community Benefit:
+Inclusivity: All Bitcoin Faces holders are eligible, ensuring broad participation.
+Engagement: The airdrop will encourage holders to participate in governance and DAO activities, addressing low governance participation.
+Stakeholder Consideration: The plan includes outreach to diverse community segments via the DAO’s social channels (e.g., Discord, X) to ensure awareness and feedback.
+Alignment with DAO Priorities:
+Promotes token utility and community engagement, core to the DAO’s mission.
+Supports financial prudence by capping costs and providing ROI through increased governance participation (measurable via voting turnout post-airdrop).
+Risk Mitigation:
+Financial Risk: Limited to 10,000 $FACES and $500, with no ongoing costs.
+Execution Risk: Smart contract audit to prevent vulnerabilities.
+Inclusion Risk: Transparent eligibility criteria to avoid disputes.
+Deliverables and ROI:
+Deliverables: Snapshot data, smart contract, airdrop distribution, and post-airdrop report.
+ROI: Expected 10% increase in governance participation (based on similar airdrop campaigns) and enhanced community sentiment, measurable via forum activity and X posts.
+Addressing Past Concerns:
+Unlike previous proposals, this includes a detailed execution plan, budget, and verification process.
+Responds to feedback on inclusion by defining clear eligibility and outreach strategies.
+Aligns with financial priorities by justifying costs and capping token usage.
+Commitment:
+I will execute the airdrop as outlined, provide regular updates on the DAO’s governance forum, and deliver a comprehensive report with proof of distribution. If the proposal is approved, I will collaborate with the DAO’s technical and community teams to ensure success.
+"""
 
     # # Convert parameters to JSON string and then hex encode it
     # parameters_hex = "0x" + binascii.hexlify(parameters.encode("utf-8")).decode("utf-8")
@@ -145,28 +200,21 @@ async def test_proposal_evaluation_workflow():
 
                 # Print the results
                 print("\nEvaluation Results:")
-                print(f"Success: {result['success']}")
-                if result["success"]:
-                    print(f"Approval: {result['evaluation']['approve']}")
-                    print(f"Confidence: {result['evaluation']['confidence_score']}")
-                    print(f"Reasoning: {result['evaluation']['reasoning']}")
-                    print(
-                        f"Total Token Usage by Model: {result.get('total_token_usage_by_model')}"
-                    )
-                    print(f"Total Cost by Model: {result.get('total_cost_by_model')}")
-                    print(
-                        f"Total Overall Cost: ${result.get('total_overall_cost', 0.0):.4f}"
-                    )
+                print(f"Approval: {result['evaluation'].get('approve', False)}")
+                print(f"Confidence: {result['evaluation'].get('confidence_score', 0)}")
+                print(
+                    f"Reasoning: {result['evaluation'].get('reasoning', 'No reasoning provided')}"
+                )
 
-                    if scenario["auto_vote"]:
-                        print(f"Auto-voted: {result['auto_voted']}")
-                        if result["vote_result"]:
-                            print(f"Vote Result: {result['vote_result']}")
-                            if result.get("tx_id"):
-                                print(f"Transaction ID: {result['tx_id']}")
-                else:
-                    print(f"Error: {result.get('error', 'Unknown error')}")
+                if "token_usage" in result.get("evaluation", {}):
+                    print(f"Total Token Usage: {result['evaluation']['token_usage']}")
 
+                if scenario["auto_vote"]:
+                    print(f"Auto-voted: {result.get('auto_voted', False)}")
+                    if result.get("vote_result"):
+                        print(f"Vote Result: {result['vote_result']}")
+                        if result.get("tx_id"):
+                            print(f"Transaction ID: {result['tx_id']}")
             except Exception as e:
                 print(f"Error in scenario {scenario['name']}: {e}")
 
diff --git a/plan.md b/plan.md
new file mode 100644
index 00000000..6a7ae99d
--- /dev/null
+++ b/plan.md
@@ -0,0 +1,556 @@
+Thank you for providing the existing directory structure. This gives me a clearer picture of the project’s organization and helps refine my recommendations for reorganizing the `proposal_evaluation.py` file. I’ll integrate the existing structure, ensure compatibility with the provided paths, and propose a reorganization that leverages the current setup while addressing the modularity and maintainability concerns of the large `proposal_evaluation.py` file. Below, I’ll revise my earlier recommendations to align with the existing structure and suggest where to place the split files, incorporating any additional considerations based on the context.
+
+---
+
+### Revised Recommendations for Reorganizing `services/workflows/proposal_evaluation.py`
+
+The `proposal_evaluation.py` file is a large, monolithic file containing multiple agent classes, state management utilities, and workflow orchestration logic. To improve modularity, readability, and maintainability, I recommend splitting it into several files within the existing `services/workflows/` directory and creating a new `services/workflows/agents/` subdirectory for agent-specific logic. Since the project already has a `services/agents/` directory, I’ll evaluate whether to use it or keep agents under `services/workflows/agents/` to maintain workflow-specific context.
+
+#### Goals
+1. **Modularize Agent Logic**: Move each agent (`CoreContextAgent`, `HistoricalContextAgent`, etc.) into separate files to reduce file size and improve maintainability.
+2. **Centralize Utilities**: Extract state reducers and image processing logic to shared utility modules, leveraging existing `lib/utils.py` or a new `services/workflows/utils/` directory.
+3. **Leverage Existing Structure**: Integrate with existing directories like `services/workflows/`, `tools/`, and `lib/` to avoid redundant restructuring.
+4. **Abstract Repeated Code**: Address duplicated token usage tracking and image handling logic with mixins or helper functions.
+5. **Maintain Compatibility**: Ensure imports align with existing modules like `services/workflows/capability_mixins.py`, `tools/tools_factory.py`, and `lib/utils.py`.
+
+#### Proposed Directory Structure Changes
+Given the existing structure, I propose the following additions and modifications:
+
+```
+services/
+├── workflows/
+│   ├── __init__.py
+│   ├── agents/                     # New subdirectory for workflow-specific agents
+│   │   ├── __init__.py
+│   │   ├── core_context.py         # CoreContextAgent
+│   │   ├── historical_context.py   # HistoricalContextAgent
+│   │   ├── financial_context.py    # FinancialContextAgent
+│   │   ├── social_context.py       # SocialContextAgent
+│   │   ├── reasoning.py            # ReasoningAgent
+│   │   └── image_processing.py     # ImageProcessingNode
+│   ├── utils/                      # New subdirectory for workflow utilities
+│   │   ├── __init__.py
+│   │   ├── state_reducers.py       # State reducers (no_update_reducer, merge_dicts, set_once)
+│   │   └── token_usage.py          # TokenUsageMixin for token tracking
+│   ├── base.py                     # Already exists, keep BaseWorkflow
+│   ├── capability_mixins.py        # Already exists, keep BaseCapabilityMixin
+│   ├── hierarchical_workflows.py   # Already exists, keep HierarchicalTeamWorkflow
+│   ├── proposal_evaluation.py      # Keep, but slim down to workflow orchestration
+│   └── ...                         # Other existing workflow files
+```
+
+#### Why Not Use `services/agents/`?
+The existing `services/agents/` directory might seem like a natural place for agent classes. However, since `proposal_evaluation.py` is tightly coupled with the `services/workflows/` module (e.g., it extends `BaseWorkflow` and uses `HierarchicalTeamWorkflow`), keeping agents under `services/workflows/agents/` ensures they remain in the workflow context. The `services/agents/` directory could be reserved for more generic or cross-workflow agents, but if you prefer to consolidate all agents there, I can adjust the recommendation accordingly.
+
+#### File Breakdown
+1. **`services/workflows/agents/core_context.py`**: Contains `CoreContextAgent` class.
+2. **`services/workflows/agents/historical_context.py`**: Contains `HistoricalContextAgent` class.
+3. **`services/workflows/agents/financial_context.py`**: Contains `FinancialContextAgent` class.
+4. **`services/workflows/agents/social_context.py`**: Contains `SocialContextAgent` class.
+5. **`services/workflows/agents/reasoning.py`**: Contains `ReasoningAgent` class.
+6. **`services/workflows/agents/image_processing.py`**: Contains `ImageProcessingNode` class, which handles image extraction and encoding.
+7. **`services/workflows/utils/state_reducers.py`**: Contains state reducer functions (`no_update_reducer`, `merge_dicts`, `set_once`) and the `update_state_with_agent_result` helper.
+8. **`services/workflows/utils/token_usage.py`**: Defines a `TokenUsageMixin` to handle repeated token usage tracking logic.
+9. **`services/workflows/proposal_evaluation.py`**: Slimmed down to include only the `ProposalEvaluationWorkflow` class, `evaluate_proposal`, `get_proposal_evaluation_tools`, `evaluate_and_vote_on_proposal`, and `evaluate_proposal_only` functions.
+10. **Shared Models**: Move `ProposalEvaluationState`, `ProposalEvaluationOutput`, `AgentOutput`, and `FinalOutput` to a shared models file, potentially `backend/models.py` (since it already exists) or a new `services/workflows/models.py`.
+
+---
+
+### Detailed Changes
+
+#### 1. Move Agent Classes to `services/workflows/agents/`
+Each agent (`CoreContextAgent`, etc.) will be moved to its own file under `services/workflows/agents/`. The structure will be similar to the example provided earlier, with imports updated to reflect the new paths. For instance, `core_context.py` would look like:
+
+```python
+# services/workflows/agents/core_context.py
+from typing import Any, Dict, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+
+from backend.models import AgentOutput  # Move AgentOutput to backend/models.py
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+from services.workflows.vector_mixin import VectorRetrievalCapability
+from lib.logger import configure_logger
+
+logger = configure_logger(__name__)
+
+class CoreContextAgent(BaseCapabilityMixin, VectorRetrievalCapability, TokenUsageMixin):
+    """Core Context Agent evaluates proposals against DAO mission and standards."""
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        BaseCapabilityMixin.__init__(self, config=config, state_key="core_score")
+        VectorRetrievalCapability.__init__(self)
+        TokenUsageMixin.__init__(self)
+        self.initialize()
+        self._initialize_vector_capability()
+
+    def _initialize_vector_capability(self):
+        if not hasattr(self, "retrieve_from_vector_store"):
+            self.retrieve_from_vector_store = (
+                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
+                    self, self.__class__
+                )
+            )
+            self.logger.info("Initialized vector retrieval capability for CoreContextAgent")
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        self._initialize_vector_capability()
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
+
+        dao_mission_text = self.config.get("dao_mission", "")
+        if not dao_mission_text:
+            try:
+                self.logger.debug(f"[DEBUG:CoreAgent:{proposal_id}] Attempting to retrieve DAO mission")
+                dao_mission = await self.retrieve_from_vector_store(
+                    query="DAO mission statement and values",
+                    collection_name=self.config.get("mission_collection", "dao_documents"),
+                    limit=3,
+                )
+                dao_mission_text = "\n".join([doc.page_content for doc in dao_mission])
+            except Exception as e:
+                self.logger.error(f"[DEBUG:CoreAgent:{proposal_id}] Error retrieving DAO mission: {str(e)}")
+                dao_mission_text = "Elevate human potential through AI on Bitcoin"
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "dao_mission"],
+            template="""Evaluate the proposal against the DAO's mission and values...
+            # (Rest of the prompt as in original file)
+            """
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                dao_mission=dao_mission_text or "Elevate human potential through AI on Bitcoin",
+            )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+            proposal_images = state.get("proposal_images", [])
+            if proposal_images:
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke([llm_input_message])
+            result_dict = result.model_dump()
+
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["core_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            update_state_with_agent_result(state, result_dict, "core")
+            return result_dict
+        except Exception as e:
+            self.logger.error(f"[DEBUG:CoreAgent:{proposal_id}] Error in core evaluation: {str(e)}")
+            return {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
+```
+
+**Notes**:
+- **Imports**: Updated to use `backend.models.AgentOutput`, `services.workflows.utils.state_reducers`, and `services.workflows.utils.token_usage`.
+- **TokenUsageMixin**: Handles token usage tracking (see below).
+- **Image Handling**: Relies on `state["proposal_images"]` set by `ImageProcessingNode`.
+
+Other agent files (`historical_context.py`, etc.) follow a similar pattern, with their respective prompts and logic.
+
+#### 2. Create `services/workflows/utils/token_usage.py`
+To abstract the repeated token usage tracking logic, create a `TokenUsageMixin`:
+
+```python
+# services/workflows/utils/token_usage.py
+from typing import Any, Dict
+from lib.utils import calculate_token_cost
+
+class TokenUsageMixin:
+    """Mixin for tracking token usage in LLM calls."""
+
+    def track_token_usage(self, prompt_text: str, result: Any) -> Dict[str, int]:
+        """Track token usage for an LLM invocation."""
+        token_usage_data = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        # Try to extract token usage from LLM
+        if (
+            hasattr(self.llm, "_last_prompt_id")
+            and hasattr(self.llm, "client")
+            and hasattr(self.llm.client, "usage_by_prompt_id")
+        ):
+            last_prompt_id = self.llm._last_prompt_id
+            if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                token_usage_data = {
+                    "input_tokens": usage.get("prompt_tokens", 0),
+                    "output_tokens": usage.get("completion_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                return token_usage_data
+
+        # Fallback to estimation
+        llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+        token_count = len(prompt_text) // 4  # Simple estimation
+        token_usage_dict = {"input_tokens": token_count}
+        cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+        token_usage_data = {
+            "input_tokens": token_count,
+            "output_tokens": len(result.model_dump_json()) // 4,
+            "total_tokens": token_count + len(result.model_dump_json()) // 4,
+            "model_name": llm_model_name,
+        }
+        return token_usage_data
+```
+
+This mixin is used by all agents to standardize token usage tracking.
+
+#### 3. Move State Reducers to `services/workflows/utils/state_reducers.py`
+Extract state management utilities:
+
+```python
+# services/workflows/utils/state_reducers.py
+from typing import Any, Dict, List, Optional
+from lib.logger import configure_logger
+
+logger = configure_logger(__name__)
+
+def no_update_reducer(current: Any, new: List[Any]) -> Any:
+    """Reducer that prevents updates after initial value is set."""
+    is_initial_empty_string = isinstance(current, str) and current == ""
+    if current is not None and not is_initial_empty_string:
+        return current
+    processed_new_values = new if isinstance(new, list) else [new]
+    for n_val in processed_new_values:
+        if n_val is not None:
+            return n_val
+    return current
+
+def merge_dicts(current: Optional[Dict], updates: List[Optional[Dict]]) -> Dict:
+    """Merge multiple dictionary updates into the current dictionary."""
+    if current is None:
+        current = {}
+    if updates is None:
+        return current
+    if isinstance(updates, list):
+        for update in updates:
+            if update and isinstance(update, dict):
+                current.update(update)
+    elif isinstance(updates, dict):
+        current.update(updates)
+    return current
+
+def set_once(current: Any, updates: List[Any]) -> Any:
+    """Set the value once and prevent further updates."""
+    if current is not None:
+        return current
+    if updates is None:
+        return None
+    if isinstance(updates, list):
+        for update in updates:
+            if update is not None:
+                return update
+    elif updates is not None:
+        return updates
+    return current
+
+def update_state_with_agent_result(
+    state: Dict[str, Any], agent_result: Dict[str, Any], agent_name: str
+) -> Dict[str, Any]:
+    """Update state with agent result including summaries and flags."""
+    logger.debug(f"[DEBUG:update_state:{agent_name}] Updating state with {agent_name}_score")
+    if agent_name in ["core", "historical", "financial", "social", "final"]:
+        score_dict = dict(agent_result)
+        if "token_usage" in score_dict:
+            del score_dict["token_usage"]
+        state[f"{agent_name}_score"] = score_dict
+
+    if "summaries" not in state:
+        state["summaries"] = {}
+    if "summary" in agent_result and agent_result["summary"]:
+        state["summaries"][f"{agent_name}_score"] = agent_result["summary"]
+
+    if "flags" not in state:
+        state["flags"] = []
+    if "flags" in agent_result and isinstance(agent_result["flags"], list):
+        state["flags"].extend(agent_result["flags"])
+
+    return state
+```
+
+This centralizes state management logic, making it reusable across workflows.
+
+#### 4. Move Image Processing to `services/workflows/agents/image_processing.py`
+Move `ImageProcessingNode` to its own file:
+
+```python
+# services/workflows/agents/image_processing.py
+import base64
+from typing import Any, Dict, List, Optional
+
+import httpx
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from lib.logger import configure_logger
+from lib.utils import extract_image_urls
+
+logger = configure_logger(__name__)
+
+class ImageProcessingNode(BaseCapabilityMixin):
+    """Workflow node to process proposal images: extract URLs, download, and base64 encode."""
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__(config=config, state_key="proposal_images")
+        self.initialize()
+
+    async def process(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_data_str = state.get("proposal_data", "")
+
+        if not proposal_data_str:
+            self.logger.info(f"[ImageProcessorNode:{proposal_id}] No proposal_data, skipping.")
+            return []
+
+        self.logger.info(f"[ImageProcessorNode:{proposal_id}] Starting image processing.")
+        image_urls = extract_image_urls(proposal_data_str)
+
+        if not image_urls:
+            self.logger.info(f"[ImageProcessorNode:{proposal_id}] No image URLs found.")
+            return []
+
+        processed_images = []
+        async with httpx.AsyncClient() as client:
+            for url in image_urls:
+                try:
+                    response = await client.get(url, timeout=10.0)
+                    response.raise_for_status()
+                    image_data = base64.b64encode(response.content).decode("utf-8")
+                    mime_type = "image/jpeg"
+                    if url.lower().endswith((".jpg", ".jpeg")):
+                        mime_type = "image/jpeg"
+                    elif url.lower().endswith(".png"):
+                        mime_type = "image/png"
+                    elif url.lower().endswith(".gif"):
+                        mime_type = "image/gif"
+                    elif url.lower().endswith(".webp"):
+                        mime_type = "image/webp"
+
+                    processed_images.append({
+                        "type": "image_url",
+                        "image_url": {"url": f"data:{mime_type};base64,{image_data}"},
+                    })
+                except Exception as e:
+                    self.logger.error(f"[ImageProcessorNode:{proposal_id}] Error for {url}: {str(e)}")
+        return processed_images
+```
+
+This isolates image processing, which is reused by all agents.
+
+#### 5. Update `services/workflows/proposal_evaluation.py`
+Slim down the file to focus on workflow orchestration and top-level functions:
+
+```python
+# services/workflows/proposal_evaluation.py
+from typing import Any, Dict, Optional
+
+from backend.factory import backend
+from backend.models import Profile, UUID
+from services.workflows.agents.core_context import CoreContextAgent
+from services.workflows.agents.financial_context import FinancialContextAgent
+from services.workflows.agents.historical_context import HistoricalContextAgent
+from services.workflows.agents.image_processing import ImageProcessingNode
+from services.workflows.agents.reasoning import ReasoningAgent
+from services.workflows.agents.social_context import SocialContextAgent
+from services.workflows.base import BaseWorkflow
+from services.workflows.hierarchical_workflows import HierarchicalTeamWorkflow
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from tools.dao_ext_action_proposals import VoteOnActionProposalTool
+from tools.tools_factory import filter_tools_by_names, initialize_tools
+from lib.logger import configure_logger
+
+logger = configure_logger(__name__)
+
+class ProposalEvaluationState:
+    # Move to backend/models.py or services/workflows/models.py
+    pass
+
+class ProposalEvaluationOutput:
+    # Move to backend/models.py or services/workflows/models.py
+    pass
+
+class ProposalEvaluationWorkflow(BaseWorkflow[ProposalEvaluationState]):
+    """Main workflow for evaluating DAO proposals using a hierarchical team."""
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__()
+        self.config = config or {}
+        self.hierarchical_workflow = HierarchicalTeamWorkflow(
+            name="ProposalEvaluation",
+            config={
+                "state_type": ProposalEvaluationState,
+                "recursion_limit": self.config.get("recursion_limit", 20),
+            },
+        )
+
+        image_processor_agent = ImageProcessingNode(config=self.config)
+        core_agent = CoreContextAgent(self.config)
+        historical_agent = HistoricalContextAgent(self.config)
+        financial_agent = FinancialContextAgent(self.config)
+        social_agent = SocialContextAgent(self.config)
+        reasoning_agent = ReasoningAgent(self.config)
+
+        self.hierarchical_workflow.add_sub_workflow("image_processor", image_processor_agent)
+        self.hierarchical_workflow.add_sub_workflow("core_agent", core_agent)
+        self.hierarchical_workflow.add_sub_workflow("historical_agent", historical_agent)
+        self.hierarchical_workflow.add_sub_workflow("financial_agent", financial_agent)
+        self.hierarchical_workflow.add_sub_workflow("social_agent", social_agent)
+        self.hierarchical_workflow.add_sub_workflow("reasoning_agent", reasoning_agent)
+
+        self.hierarchical_workflow.set_entry_point("image_processor")
+        self.hierarchical_workflow.set_supervisor_logic(self._supervisor_logic)
+        self.hierarchical_workflow.set_halt_condition(self._halt_condition)
+        self.required_fields = ["proposal_id", "proposal_data"]
+
+    def _supervisor_logic(self, state: ProposalEvaluationState) -> str | List[str]:
+        # (Supervisor logic as in original file)
+        pass
+
+    def _halt_condition(self, state: ProposalEvaluationState) -> bool:
+        # (Halt condition logic as in original file)
+        pass
+
+    def _create_prompt(self):
+        # (Prompt creation as in original file)
+        pass
+
+    def _create_graph(self):
+        return self.hierarchical_workflow.build_graph()
+
+    def _validate_state(self, state: ProposalEvaluationState) -> bool:
+        # (State validation as in original file)
+        pass
+
+async def evaluate_proposal(proposal_id: str, proposal_data: str, config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    # (evaluate_proposal function as in original file)
+    pass
+
+def get_proposal_evaluation_tools(profile: Optional[Profile] = None, agent_id: Optional[UUID] = None):
+    # (get_proposal_evaluation_tools function as in original file)
+    pass
+
+async def evaluate_and_vote_on_proposal(
+    proposal_id: UUID, wallet_id: Optional[UUID] = None, agent_id: Optional[UUID] = None,
+    auto_vote: bool = True, confidence_threshold: float = 0.7, dao_id: Optional[UUID] = None,
+    debug_level: int = 0
+) -> Dict:
+    # (evaluate_and_vote_on_proposal function as in original file)
+    pass
+
+async def evaluate_proposal_only(
+    proposal_id: UUID, wallet_id: Optional[UUID] = None, agent_id: Optional[UUID] = None,
+    dao_id: Optional[UUID] = None
+) -> Dict:
+    # (evaluate_proposal_only function as in original file)
+    pass
+```
+
+**Notes**:
+- **Slimmed Down**: Only includes workflow orchestration and top-level functions.
+- **Agent Imports**: Updated to use `services.workflows.agents.*`.
+- **Models**: Assumes `ProposalEvaluationState`, etc., are moved to `backend/models.py`.
+
+#### 6. Move Models to `backend/models.py`
+Since `backend/models.py` already exists, append the Pydantic models and TypedDict:
+
+```python
+# backend/models.py
+from typing import Annotated, Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+
+# Existing models (UUID, ExtensionFilter, etc.)
+# ...
+
+class ProposalEvaluationOutput(BaseModel):
+    approve: bool = Field(description="Decision: true to approve, false to reject")
+    confidence_score: float = Field(description="Confidence score (0.0-1.0)")
+    reasoning: str = Field(description="Reasoning behind the evaluation decision")
+
+class AgentOutput(BaseModel):
+    score: int = Field(description="Score from 0-100")
+    flags: List[str] = Field(description="Critical issues flagged")
+    summary: str = Field(description="Summary of findings")
+
+class FinalOutput(BaseModel):
+    score: int = Field(description="Final evaluation score")
+    decision: str = Field(description="Approve or Reject")
+    explanation: str = Field(description="Reasoning for decision")
+
+class ProposalEvaluationState(TypedDict):
+    proposal_id: Annotated[str, no_update_reducer]
+    proposal_data: Annotated[str, no_update_reducer]
+    core_score: Annotated[Optional[Dict[str, Any]], set_once]
+    historical_score: Annotated[Optional[Dict[str, Any]], set_once]
+    financial_score: Annotated[Optional[Dict[str, Any]], set_once]
+    social_score: Annotated[Optional[Dict[str, Any]], set_once]
+    final_score: Annotated[Optional[Dict[str, Any]], set_once]
+    flags: Annotated[List[str], append_list_fn]
+    summaries: Annotated[Dict[str, str], merge_dicts]
+    decision: Annotated[Optional[str], set_once]
+    halt: Annotated[bool, operator.or_]
+    token_usage: Annotated[Dict[str, Dict[str, int]], merge_dicts]
+    core_agent_invocations: Annotated[int, operator.add]
+    proposal_images: Annotated[Optional[List[Dict]], set_once]
+```
+
+Alternatively, create `services/workflows/models.py` if you prefer to keep workflow-specific models separate.
+
+---
+
+### Additional Considerations
+1. **Existing `lib/utils.py`**: The `extract_image_urls` and `calculate_token_cost` functions are already in `lib/utils.py`. Ensure `services/workflows/agents/image_processing.py` imports `extract_image_urls` correctly.
+2. **Logging**: The `lib/logger.py` module is used for `configure_logger`. Consider adding a debug level configuration in `config.py` to control verbosity dynamically.
+3. **Tool Integration**: The `get_proposal_evaluation_tools` function uses `tools/tools_factory.py`, which is correctly placed. No changes needed here.
+4. **Documentation**: Update `docs/workflows.md` to reflect the new structure, detailing the `services/workflows/agents/` and `services/workflows/utils/` directories.
+5. **Testing**: Ensure the `examples/proposal_evaluation_example.py` script is updated to use the new import paths (e.g., `from services.workflows.proposal_evaluation import evaluate_proposal`).
+
+---
+
+### Example Workflow
+To illustrate how the reorganized code works together, here’s how `ProposalEvaluationWorkflow` in `proposal_evaluation.py` integrates the agents:
+
+```python
+# services/workflows/proposal_evaluation.py (snippet)
+from services.workflows.agents.core_context import CoreContextAgent
+from services.workflows.agents.image_processing import ImageProcessingNode
+# ... other imports
+
+class ProposalEvaluationWorkflow(BaseWorkflow[ProposalEvaluationState]):
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__()
+        self.config = config or {}
+        self.hierarchical_workflow = HierarchicalTeamWorkflow(
+            name="ProposalEvaluation",
+            config={"state_type": ProposalEvaluationState, "recursion_limit": 20},
+        )
+
+        # Add agents
+        self.hierarchical_workflow.add_sub_workflow("image_processor", ImageProcessingNode(self.config))
+        self.hierarchical_workflow.add_sub_workflow("core_agent", CoreContextAgent(self.config))
+        # ... other agents
+```
+
+The `ImageProcessingNode` processes images first, setting `state["proposal_images"]`, which agents like `CoreContextAgent` then use.
+
+---
+
+### Benefits
+- **Modularity**: Each agent and utility is in its own file, making it easier to maintain and test.
+- **Reusability**: `TokenUsageMixin` and state reducers are reusable across workflows.
+- **Clarity**: `proposal_evaluation.py` is focused on orchestration, reducing cognitive load.
+- **Scalability**: New agents can be added to `services/workflows/agents/` without modifying the main workflow file.
+
+---
+
+### Next Steps
+- **Implementation**: Start by creating `services/workflows/agents/` and `services/workflows/utils/` directories, then move the agent classes and utilities as outlined.
+- **Testing**: Run the `examples/proposal_evaluation_example.py` script to ensure all imports and functionality work.
+- **Feedback**: If you prefer using `services/agents/` instead of `services/workflows/agents/`, or have specific constraints (e.g., avoiding new directories), let me know, and I can adjust the plan.
+- **Further Refinement**: If you’d like me to provide the full content of another file (e.g., `historical_context.py` or `state_reducers.py`), I can do so.
+
+Would you like me to proceed with generating another specific file, or do you have additional preferences or constraints for the reorganization?
\ No newline at end of file
diff --git a/services/workflows/README.md b/services/workflows/README.md
new file mode 100644
index 00000000..fb0755de
--- /dev/null
+++ b/services/workflows/README.md
@@ -0,0 +1,66 @@
+# Workflows Module
+
+This module contains workflow implementations for various AI agent tasks. The primary focus is on providing structured, composable workflows that can coordinate multiple specialized agents.
+
+## Directory Structure
+
+```
+services/workflows/
+├── agents/                   # Specialized agent implementations
+│   ├── core_context.py       # Evaluates proposals against DAO mission and values
+│   ├── financial_context.py  # Analyzes financial aspects of proposals
+│   ├── historical_context.py # Evaluates proposals against historical context
+│   ├── image_processing.py   # Processes images in proposals
+│   ├── reasoning.py          # Makes final decisions based on other agents' input
+│   └── social_context.py     # Evaluates social/community aspects of proposals
+│
+├── utils/                    # Shared utilities for workflow support
+│   ├── models.py             # Shared Pydantic models
+│   ├── state_reducers.py     # State management utilities
+│   └── token_usage.py        # Token usage tracking utilities
+│
+├── base.py                   # Base workflow infrastructure
+├── capability_mixins.py      # Capability mixins for agent extensions
+├── hierarchical_workflows.py # Hierarchical team workflow infrastructure
+├── planning_mixin.py         # Planning capabilities
+├── proposal_evaluation.py    # Proposal evaluation workflow
+├── vector_mixin.py           # Vector retrieval capabilities
+└── web_search_mixin.py       # Web search capabilities
+```
+
+## Main Workflows
+
+### Proposal Evaluation Workflow
+
+The `ProposalEvaluationWorkflow` in `proposal_evaluation.py` is a hierarchical workflow that uses multiple specialized agents to evaluate a DAO proposal. The workflow:
+
+1. Processes any images in the proposal
+2. Evaluates the proposal against the DAO's mission and values (core context)
+3. Evaluates the proposal against historical precedents
+4. Analyzes the financial aspects of the proposal
+5. Evaluates the social context and community impacts
+6. Makes a final decision combining all evaluations
+
+API functions:
+- `evaluate_proposal(proposal_id, proposal_data, config)`: Evaluates a proposal
+- `evaluate_and_vote_on_proposal(proposal_id, ...)`: Evaluates and automatically votes on a proposal
+- `evaluate_proposal_only(proposal_id, ...)`: Evaluates a proposal without voting
+
+## Agents
+
+Each agent in the `agents/` directory specializes in a specific aspect of proposal evaluation:
+
+- `CoreContextAgent`: Evaluates alignment with DAO mission and values
+- `HistoricalContextAgent`: Evaluates against past proposals and decisions
+- `FinancialContextAgent`: Analyzes budget, costs, and financial impact
+- `SocialContextAgent`: Evaluates community impact and social context
+- `ReasoningAgent`: Makes the final decision based on all evaluations
+- `ImageProcessingNode`: Handles image extraction and processing
+
+## Utilities
+
+The `utils/` directory contains shared utilities:
+
+- `state_reducers.py`: Contains functions for managing state in workflows
+- `token_usage.py`: Provides the `TokenUsageMixin` for tracking LLM token usage
+- `models.py`: Contains shared Pydantic models like `AgentOutput` and `FinalOutput` 
\ No newline at end of file
diff --git a/services/workflows/agents/__init__.py b/services/workflows/agents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/workflows/agents/core_context.py b/services/workflows/agents/core_context.py
new file mode 100644
index 00000000..bece9a93
--- /dev/null
+++ b/services/workflows/agents/core_context.py
@@ -0,0 +1,144 @@
+from typing import Any, Dict, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.utils.models import AgentOutput
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+from services.workflows.vector_mixin import VectorRetrievalCapability
+
+logger = configure_logger(__name__)
+
+
+class CoreContextAgent(BaseCapabilityMixin, VectorRetrievalCapability, TokenUsageMixin):
+    """Core Context Agent evaluates proposals against DAO mission and standards."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Core Context Agent.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        BaseCapabilityMixin.__init__(self, config=config, state_key="core_score")
+        VectorRetrievalCapability.__init__(self)
+        TokenUsageMixin.__init__(self)
+        self.initialize()
+        self._initialize_vector_capability()
+
+    def _initialize_vector_capability(self):
+        """Initialize the vector retrieval capability if not already initialized."""
+        if not hasattr(self, "retrieve_from_vector_store"):
+            self.retrieve_from_vector_store = (
+                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
+                    self, self.__class__
+                )
+            )
+            self.logger.info(
+                "Initialized vector retrieval capability for CoreContextAgent"
+            )
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Process the proposal against core DAO context.
+
+        Args:
+            state: The current workflow state
+
+        Returns:
+            Dictionary containing evaluation results
+        """
+        self._initialize_vector_capability()
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
+
+        # Initialize token usage tracking in state if not present
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+
+        # Retrieve or use provided DAO mission text
+        dao_mission_text = self.config.get("dao_mission", "")
+        if not dao_mission_text:
+            try:
+                self.logger.debug(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Attempting to retrieve DAO mission"
+                )
+                dao_mission = await self.retrieve_from_vector_store(
+                    query="DAO mission statement and values",
+                    collection_name=self.config.get(
+                        "mission_collection", "dao_documents"
+                    ),
+                    limit=3,
+                )
+                dao_mission_text = "\n".join([doc.page_content for doc in dao_mission])
+            except Exception as e:
+                self.logger.error(
+                    f"[DEBUG:CoreAgent:{proposal_id}] Error retrieving DAO mission: {str(e)}"
+                )
+                dao_mission_text = "Elevate human potential through AI on Bitcoin"
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "dao_mission"],
+            template="""Evaluate the proposal against the DAO's mission and values.
+
+# Context
+You are evaluating a proposal for a DAO that focuses on: {dao_mission}
+
+# Proposal Data
+{proposal_data}
+
+# Task
+Score this proposal from 0-100 based on:
+1. Alignment with DAO mission (40%)
+2. Clarity of proposal (20%)
+3. Feasibility and practicality (20%)
+4. Community benefit (20%)
+
+# Output Format
+Provide:
+- Score (0-100)
+- List of any critical issues or red flags
+- Brief summary of your evaluation
+
+Only return a JSON object with these three fields: score, flags (array), and summary.""",
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                dao_mission=dao_mission_text
+                or "Elevate human potential through AI on Bitcoin",
+            )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+
+            # Add any proposal images to the message
+            proposal_images = state.get("proposal_images", [])
+            if proposal_images:
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            # Get structured output from the LLM
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            result_dict = result.model_dump()
+
+            # Track token usage
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["core_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            # Update state with agent result
+            update_state_with_agent_result(state, result_dict, "core")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:CoreAgent:{proposal_id}] Error in core evaluation: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Evaluation failed due to error",
+            }
diff --git a/services/workflows/agents/financial_context.py b/services/workflows/agents/financial_context.py
new file mode 100644
index 00000000..1302b589
--- /dev/null
+++ b/services/workflows/agents/financial_context.py
@@ -0,0 +1,130 @@
+from typing import Any, Dict, List, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.utils.models import AgentOutput
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+
+logger = configure_logger(__name__)
+
+
+class FinancialContextAgent(BaseCapabilityMixin, TokenUsageMixin):
+    """Financial Context Agent evaluates financial aspects of proposals."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Financial Context Agent.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        BaseCapabilityMixin.__init__(self, config=config, state_key="financial_score")
+        TokenUsageMixin.__init__(self)
+        self.initialize()
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Process the proposal's financial aspects.
+
+        Args:
+            state: The current workflow state
+
+        Returns:
+            Dictionary containing financial evaluation results
+        """
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
+
+        # Initialize token usage tracking in state if not present
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+
+        # Get DAO financial context from config if available
+        dao_financial_context = self.config.get("dao_financial_context", {})
+        treasury_balance = dao_financial_context.get("treasury_balance", "unknown")
+        monthly_budget = dao_financial_context.get("monthly_budget", "unknown")
+        funding_priorities = dao_financial_context.get("funding_priorities", [])
+        financial_constraints = dao_financial_context.get("financial_constraints", [])
+
+        # Format financial context for the prompt
+        financial_context_text = f"""
+Treasury Balance: {treasury_balance}
+Monthly Budget: {monthly_budget}
+Funding Priorities: {', '.join(funding_priorities) if funding_priorities else 'Not specified'}
+Financial Constraints: {', '.join(financial_constraints) if financial_constraints else 'Not specified'}
+"""
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "financial_context"],
+            template="""Evaluate the financial aspects of this proposal for the DAO.
+
+# Proposal
+{proposal_data}
+
+# DAO Financial Context
+{financial_context}
+
+# Task
+Score this proposal from 0-100 based on:
+1. Cost-effectiveness and value for money (40%)
+2. Budget accuracy and detail (20%)
+3. Financial risk assessment (20%)
+4. Alignment with DAO's financial priorities (20%)
+
+When analyzing, consider:
+- Is the proposal requesting a reasonable amount?
+- Are costs well-justified with clear deliverables?
+- Are there hidden or underestimated costs?
+- Does it align with the DAO's financial priorities?
+- What is the potential ROI (Return on Investment)?
+- Are there financial risks or dependencies?
+
+# Output Format
+Provide:
+- Score (0-100)
+- List of any critical financial issues or red flags
+- Brief summary of your financial evaluation
+
+Only return a JSON object with these three fields: score, flags (array), and summary.""",
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                financial_context=financial_context_text,
+            )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+
+            # Add any proposal images to the message
+            proposal_images = state.get("proposal_images", [])
+            if proposal_images:
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            # Get structured output from the LLM
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            result_dict = result.model_dump()
+
+            # Track token usage
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["financial_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            # Update state with agent result
+            update_state_with_agent_result(state, result_dict, "financial")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:FinancialAgent:{proposal_id}] Error in financial evaluation: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Financial evaluation failed due to error",
+            }
diff --git a/services/workflows/agents/historical_context.py b/services/workflows/agents/historical_context.py
new file mode 100644
index 00000000..70c34121
--- /dev/null
+++ b/services/workflows/agents/historical_context.py
@@ -0,0 +1,159 @@
+from typing import Any, Dict, List, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.utils.models import AgentOutput
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+from services.workflows.vector_mixin import VectorRetrievalCapability
+
+logger = configure_logger(__name__)
+
+
+class HistoricalContextAgent(
+    BaseCapabilityMixin, VectorRetrievalCapability, TokenUsageMixin
+):
+    """Historical Context Agent evaluates proposals against DAO historical context and past decisions."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Historical Context Agent.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        BaseCapabilityMixin.__init__(self, config=config, state_key="historical_score")
+        VectorRetrievalCapability.__init__(self)
+        TokenUsageMixin.__init__(self)
+        self.initialize()
+        self._initialize_vector_capability()
+
+    def _initialize_vector_capability(self):
+        """Initialize the vector retrieval capability if not already initialized."""
+        if not hasattr(self, "retrieve_from_vector_store"):
+            self.retrieve_from_vector_store = (
+                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
+                    self, self.__class__
+                )
+            )
+            self.logger.info(
+                "Initialized vector retrieval capability for HistoricalContextAgent"
+            )
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Process the proposal against historical context.
+
+        Args:
+            state: The current workflow state
+
+        Returns:
+            Dictionary containing evaluation results
+        """
+        self._initialize_vector_capability()
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
+
+        # Initialize token usage tracking in state if not present
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+
+        # Retrieve similar past proposals if possible
+        past_proposals_text = ""
+        try:
+            self.logger.debug(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Retrieving similar past proposals"
+            )
+            similar_proposals = await self.retrieve_from_vector_store(
+                query=proposal_content[
+                    :1000
+                ],  # Use first 1000 chars of proposal as query
+                collection_name=self.config.get(
+                    "proposals_collection", "past_proposals"
+                ),
+                limit=3,
+            )
+            past_proposals_text = "\n\n".join(
+                [
+                    f"Past Proposal {i+1}:\n{doc.page_content}"
+                    for i, doc in enumerate(similar_proposals)
+                ]
+            )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error retrieving similar proposals: {str(e)}"
+            )
+            past_proposals_text = "No similar past proposals available."
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "past_proposals"],
+            template="""Evaluate this proposal in the context of the DAO's past decisions and similar proposals.
+
+# Current Proposal
+{proposal_data}
+
+# Similar Past Proposals
+{past_proposals}
+
+# Task
+Evaluate whether this proposal:
+1. Is a duplicate of past proposals (40%)
+2. Has addressed issues raised in similar past proposals (30%)
+3. Shows consistency with past approved proposals (30%)
+
+Score this proposal from 0-100 based on the criteria above.
+- 0-20: Exact duplicate or contradicts previous decisions
+- 21-50: Significant overlap without addressing past concerns
+- 51-70: Similar to past proposals but with improvements
+- 71-90: Builds well on past work with few concerns
+- 91-100: Unique proposal or excellent improvement on past proposals
+
+# Output Format
+Provide:
+- Score (0-100)
+- List of any critical issues or red flags
+- Brief summary of your evaluation 
+
+Only return a JSON object with these three fields: score, flags (array), and summary.""",
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                past_proposals=past_proposals_text
+                or "No past proposals available for comparison.",
+            )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+
+            # Add any proposal images to the message
+            proposal_images = state.get("proposal_images", [])
+            if proposal_images:
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            # Get structured output from the LLM
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            result_dict = result.model_dump()
+
+            # Track token usage
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["historical_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            # Update state with agent result
+            update_state_with_agent_result(state, result_dict, "historical")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error in historical evaluation: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Historical evaluation failed due to error",
+            }
diff --git a/services/workflows/agents/image_processing.py b/services/workflows/agents/image_processing.py
new file mode 100644
index 00000000..bd129e5b
--- /dev/null
+++ b/services/workflows/agents/image_processing.py
@@ -0,0 +1,95 @@
+import base64
+from typing import Any, Dict, List, Optional
+
+import httpx
+
+from lib.logger import configure_logger
+from lib.utils import extract_image_urls
+from services.workflows.capability_mixins import BaseCapabilityMixin
+
+logger = configure_logger(__name__)
+
+
+class ImageProcessingNode(BaseCapabilityMixin):
+    """Workflow node to process proposal images: extract URLs, download, and base64 encode."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the image processing node.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        super().__init__(config=config, state_key="proposal_images")
+        self.initialize()
+
+    async def process(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Process images in the proposal data.
+
+        Args:
+            state: The current workflow state
+
+        Returns:
+            List of dictionaries containing processed images in a format suitable for LLM
+        """
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_data_str = state.get("proposal_data", "")
+
+        if not proposal_data_str:
+            self.logger.info(
+                f"[ImageProcessorNode:{proposal_id}] No proposal_data, skipping."
+            )
+            # Return empty list to ensure state is updated
+            return []
+
+        self.logger.info(
+            f"[ImageProcessorNode:{proposal_id}] Starting image processing."
+        )
+        image_urls = extract_image_urls(proposal_data_str)
+
+        if not image_urls:
+            self.logger.info(f"[ImageProcessorNode:{proposal_id}] No image URLs found.")
+            # Return empty list explicitly to ensure state is updated
+            return []
+
+        processed_images = []
+        async with httpx.AsyncClient() as client:
+            for url in image_urls:
+                try:
+                    self.logger.debug(
+                        f"[ImageProcessorNode:{proposal_id}] Processing image URL: {url}"
+                    )
+                    response = await client.get(url, timeout=10.0)
+                    response.raise_for_status()
+                    image_data = base64.b64encode(response.content).decode("utf-8")
+
+                    # Determine MIME type from URL extension
+                    mime_type = "image/jpeg"  # Default
+                    if url.lower().endswith((".jpg", ".jpeg")):
+                        mime_type = "image/jpeg"
+                    elif url.lower().endswith(".png"):
+                        mime_type = "image/png"
+                    elif url.lower().endswith(".gif"):
+                        mime_type = "image/gif"
+                    elif url.lower().endswith(".webp"):
+                        mime_type = "image/webp"
+
+                    processed_images.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{mime_type};base64,{image_data}"
+                            },
+                        }
+                    )
+                    self.logger.debug(
+                        f"[ImageProcessorNode:{proposal_id}] Successfully processed image: {url}"
+                    )
+                except Exception as e:
+                    self.logger.error(
+                        f"[ImageProcessorNode:{proposal_id}] Error processing {url}: {str(e)}"
+                    )
+
+        self.logger.info(
+            f"[ImageProcessorNode:{proposal_id}] Processed {len(processed_images)} images."
+        )
+        return processed_images
diff --git a/services/workflows/agents/reasoning.py b/services/workflows/agents/reasoning.py
new file mode 100644
index 00000000..30aa4289
--- /dev/null
+++ b/services/workflows/agents/reasoning.py
@@ -0,0 +1,297 @@
+import asyncio
+from typing import Any, Dict, List, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph
+from pydantic import BaseModel, Field
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.chat import StreamingCallbackHandler
+from services.workflows.planning_mixin import PlanningCapability
+from services.workflows.utils.models import FinalOutput
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+
+logger = configure_logger(__name__)
+
+
+class FinalOutput(BaseModel):
+    """Schema for final decision output."""
+
+    score: int = Field(..., description="Final score between 0-100")
+    decision: str = Field(..., description="Approve or Reject")
+    explanation: str = Field(..., description="Reasoning for the decision")
+
+
+class ReasoningAgent(BaseCapabilityMixin, PlanningCapability, TokenUsageMixin):
+    """Reasoning Agent that makes the final evaluation decision based on other agents' inputs."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Reasoning Agent.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        BaseCapabilityMixin.__init__(self, config=config, state_key="final_score")
+        TokenUsageMixin.__init__(self)
+
+        # Create a dummy queue for the StreamingCallbackHandler
+        self.dummy_queue = asyncio.Queue()
+        # Create callback handler and planning_llm for PlanningCapability
+        # These won't be used since we don't actually use the planning functionality
+        self.dummy_callback = StreamingCallbackHandler(queue=self.dummy_queue)
+        self.dummy_llm = ChatOpenAI()
+
+        # Pass the required arguments to PlanningCapability.__init__
+        PlanningCapability.__init__(
+            self, callback_handler=self.dummy_callback, planning_llm=self.dummy_llm
+        )
+
+        self.initialize()
+        self._initialize_planning_capability()
+
+        # Configuration for thresholds
+        self.default_threshold = config.get("approval_threshold", 70)
+        self.veto_threshold = config.get("veto_threshold", 30)
+        self.consensus_threshold = config.get("consensus_threshold", 10)
+        self.confidence_adjustment = config.get("confidence_adjustment", 0.15)
+
+    def _initialize_planning_capability(self):
+        """Initialize the planning capability if not already initialized."""
+        if not hasattr(self, "planning"):
+            # We don't actually use the planning method, just create a dummy placeholder
+            self.planning = lambda *args, **kwargs: None
+            self.logger.info("Initialized dummy planning capability for ReasoningAgent")
+
+    def integrate_with_graph(self, graph: StateGraph, **kwargs) -> None:
+        """Hook to integrate with a particular graph."""
+        pass
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Process all agent scores and make a final decision.
+
+        Args:
+            state: The current workflow state with all agent results
+
+        Returns:
+            Dictionary containing the final evaluation decision
+        """
+        self._initialize_planning_capability()
+        proposal_id = state.get("proposal_id", "unknown")
+
+        # Add diagnostic logging
+        self.logger.info(
+            f"[DEBUG:ReasoningAgent:{proposal_id}] Starting reasoning agent process"
+        )
+        self.logger.info(
+            f"[DEBUG:ReasoningAgent:{proposal_id}] State keys: {list(state.keys())}"
+        )
+
+        # Initialize token usage tracking in state if not present
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+
+        # Helper function to safely get scores
+        def safe_get_score(value, default=0):
+            if isinstance(value, dict) and "score" in value:
+                return value["score"]
+            return default
+
+        # Get individual scores
+        core_score = safe_get_score(state.get("core_score"), 0)
+        historical_score = safe_get_score(state.get("historical_score"), 0)
+        financial_score = safe_get_score(state.get("financial_score"), 0)
+        social_score = safe_get_score(state.get("social_score"), 0)
+
+        # Get agent summaries
+        core_summary = state.get("summaries", {}).get(
+            "core_score", "No core context evaluation available."
+        )
+        historical_summary = state.get("summaries", {}).get(
+            "historical_score", "No historical context evaluation available."
+        )
+        financial_summary = state.get("summaries", {}).get(
+            "financial_score", "No financial evaluation available."
+        )
+        social_summary = state.get("summaries", {}).get(
+            "social_score", "No social context evaluation available."
+        )
+
+        # Get flags
+        flags = state.get("flags", [])
+        flags_text = (
+            "\n".join([f"- {flag}" for flag in flags])
+            if flags
+            else "No flags identified."
+        )
+
+        # Calculate score statistics
+        scores = [
+            ("Core", core_score),
+            ("Historical", historical_score),
+            ("Financial", financial_score),
+            ("Social", social_score),
+        ]
+        valid_scores = [score for _, score in scores if score > 0]
+
+        if not valid_scores:
+            self.logger.error(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] No valid scores found!"
+            )
+            return {
+                "score": 0,
+                "decision": "Reject",
+                "explanation": "Unable to evaluate due to missing agent scores.",
+                "flags": ["Critical: No valid evaluation scores available."],
+            }
+
+        # Calculate metrics
+        avg_score = sum(valid_scores) / len(valid_scores)
+        min_score = min(valid_scores)
+        max_score = max(valid_scores)
+        score_range = max_score - min_score
+
+        # Detect if any agent has a veto-level score
+        has_veto = any(score <= self.veto_threshold for score in valid_scores)
+
+        # Check for consensus or disagreement
+        has_consensus = score_range <= self.consensus_threshold
+        has_disagreement = score_range >= 30
+
+        # Format agent evaluations for prompt
+        agent_evaluations = f"""
+Core Context Evaluation:
+Score: {core_score}/100
+Summary: {core_summary}
+
+Historical Context Evaluation:
+Score: {historical_score}/100
+Summary: {historical_summary}
+
+Financial Evaluation:
+Score: {financial_score}/100
+Summary: {financial_summary}
+
+Social Context Evaluation:
+Score: {social_score}/100
+Summary: {social_summary}
+
+Flags Identified:
+{flags_text}
+
+Score Statistics:
+- Average Score: {avg_score:.2f}
+- Minimum Score: {min_score}
+- Maximum Score: {max_score}
+- Score Range: {score_range}
+"""
+
+        prompt = PromptTemplate(
+            input_variables=["agent_evaluations", "approval_threshold"],
+            template="""Analyze the specialized agent evaluations and make a final decision on this proposal.
+
+# Agent Evaluations
+{agent_evaluations}
+
+# Decision Guidelines
+- The default threshold for approval is {approval_threshold}/100
+- A proposal with any agent score below 30 should typically be rejected
+- A proposal with high consensus (small range between scores) increases confidence
+- A proposal with high disagreement (large range between scores) decreases confidence
+- Consider the reasoning behind each agent's score, not just the numerical value
+- Critical flags should be weighted heavily in your decision
+
+# Task
+1. Analyze the evaluations from all agents
+2. Consider the significance of any critical flags
+3. Weigh the relative importance of different evaluation dimensions
+4. Make a final decision (Approve or Reject) with a final score
+5. Provide clear reasoning for your decision
+
+# Output Format
+Your response should be a JSON object with:
+- score: A final score from 0-100
+- decision: Either "Approve" or "Reject"
+- explanation: Your reasoning for the decision
+
+Return only the JSON object with these three fields.""",
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                agent_evaluations=agent_evaluations,
+                approval_threshold=self.default_threshold,
+            )
+
+            llm_input_message = HumanMessage(content=formatted_prompt_text)
+
+            # Get structured output from the LLM
+            result = await self.llm.with_structured_output(FinalOutput).ainvoke(
+                [llm_input_message]
+            )
+            result_dict = result.model_dump()
+
+            # Track token usage
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["reasoning_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            # Add calculated metrics to result for transparency
+            result_dict["metrics"] = {
+                "avg_score": avg_score,
+                "min_score": min_score,
+                "max_score": max_score,
+                "score_range": score_range,
+                "has_veto": has_veto,
+                "has_consensus": has_consensus,
+                "has_disagreement": has_disagreement,
+            }
+
+            # Calculate confidence based on consensus/disagreement
+            confidence = 0.7  # Base confidence
+            if has_consensus:
+                confidence += self.confidence_adjustment
+            if has_disagreement:
+                confidence -= self.confidence_adjustment
+            if has_veto:
+                confidence -= 0.3
+
+            result_dict["confidence"] = max(
+                0.1, min(1.0, confidence)
+            )  # Clamp to [0.1, 1.0]
+
+            # Add flags to the result
+            result_dict["flags"] = flags
+
+            # Update state with agent result
+            update_state_with_agent_result(state, result_dict, "final")
+
+            # Add final diagnostic logging
+            self.logger.info(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Successfully completed reasoning"
+            )
+            self.logger.info(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Returning result with decision: {result_dict.get('decision')}"
+            )
+            self.logger.info(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Updated state now has keys: {list(state.keys())}"
+            )
+            if "final_score" in state:
+                self.logger.info(
+                    f"[DEBUG:ReasoningAgent:{proposal_id}] final_score type: {type(state.get('final_score'))}"
+                )
+
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:ReasoningAgent:{proposal_id}] Error in reasoning: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "decision": "Reject",
+                "explanation": f"Evaluation failed due to error: {str(e)}",
+                "flags": [f"Error: {str(e)}"],
+            }
diff --git a/services/workflows/agents/social_context.py b/services/workflows/agents/social_context.py
new file mode 100644
index 00000000..68e687e7
--- /dev/null
+++ b/services/workflows/agents/social_context.py
@@ -0,0 +1,209 @@
+from typing import Any, Dict, List, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+
+from lib.logger import configure_logger
+from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.utils.models import AgentOutput
+from services.workflows.utils.state_reducers import update_state_with_agent_result
+from services.workflows.utils.token_usage import TokenUsageMixin
+from services.workflows.web_search_mixin import WebSearchCapability
+
+logger = configure_logger(__name__)
+
+
+class SocialContextAgent(BaseCapabilityMixin, WebSearchCapability, TokenUsageMixin):
+    """Social Context Agent evaluates social and community aspects of proposals."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the Social Context Agent.
+
+        Args:
+            config: Optional configuration dictionary
+        """
+        BaseCapabilityMixin.__init__(self, config=config, state_key="social_score")
+        WebSearchCapability.__init__(self)
+        TokenUsageMixin.__init__(self)
+        self.initialize()
+        self._initialize_web_search_capability()
+
+    def _initialize_web_search_capability(self):
+        """Initialize the web search capability if not already initialized."""
+        if not hasattr(self, "web_search"):
+            self.web_search = WebSearchCapability.web_search.__get__(
+                self, self.__class__
+            )
+            self.logger.info("Initialized web search capability for SocialContextAgent")
+
+    async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Process the proposal's social context.
+
+        Args:
+            state: The current workflow state
+
+        Returns:
+            Dictionary containing social evaluation results
+        """
+        self._initialize_web_search_capability()
+        proposal_id = state.get("proposal_id", "unknown")
+        proposal_content = state.get("proposal_data", "")
+
+        # Initialize token usage tracking in state if not present
+        if "token_usage" not in state:
+            state["token_usage"] = {}
+
+        # Extract key concepts for web search
+        search_results = []
+        try:
+            # First try to identify key search terms
+            key_concepts_prompt = PromptTemplate(
+                input_variables=["proposal"],
+                template="""Extract 2-3 key topics from this proposal that would benefit from external information:
+
+{proposal}
+
+Return only the key topics as a comma-separated list. Be specific and concise.
+""",
+            )
+
+            key_concepts_result = await self.llm.ainvoke(
+                key_concepts_prompt.format(proposal=proposal_content[:1500])
+            )
+
+            # Use these concepts for web search
+            key_concepts = key_concepts_result.content.strip()
+            self.logger.info(
+                f"[DEBUG:SocialAgent:{proposal_id}] Extracted key concepts: {key_concepts}"
+            )
+
+            if key_concepts:
+                dao_name = self.config.get("dao_name", "DAO")
+                search_query = (
+                    f"{key_concepts} {dao_name} bitcoin community perspective"
+                )
+                self.logger.info(
+                    f"[DEBUG:SocialAgent:{proposal_id}] Searching: {search_query}"
+                )
+
+                search_results, token_usage = await self.web_search(
+                    query=search_query,
+                    num_results=3,
+                )
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:SocialAgent:{proposal_id}] Error in web search: {str(e)}"
+            )
+            search_results = []
+
+        # Format search results for inclusion in the prompt
+        search_results_text = ""
+        if search_results:
+            search_results_text = "Web search results relevant to this proposal:\n\n"
+            for i, doc in enumerate(search_results):
+                page_content = doc.get("page_content", "No content available")
+                source_urls = doc.get("metadata", {}).get("source_urls", [])
+
+                if source_urls:
+                    for j, source in enumerate(source_urls):
+                        search_results_text += (
+                            f"Source {i+1}.{j+1}: {source.get('title', 'Unknown')}\n"
+                        )
+                        search_results_text += f"URL: {source.get('url', 'Unknown')}\n"
+
+                search_results_text += f"Summary: {page_content[:300]}...\n\n"
+        else:
+            search_results_text = "No relevant web search results available.\n"
+
+        # Get community info from config
+        community_context = self.config.get("community_context", {})
+        community_size = community_context.get("community_size", "Unknown")
+        active_members = community_context.get("active_members", "Unknown")
+        governance_participation = community_context.get(
+            "governance_participation", "Low"
+        )
+        recent_sentiment = community_context.get("recent_sentiment", "Neutral")
+
+        community_info = f"""
+Community Size: {community_size}
+Active Members: {active_members}
+Governance Participation: {governance_participation}
+Recent Community Sentiment: {recent_sentiment}
+"""
+
+        prompt = PromptTemplate(
+            input_variables=["proposal_data", "search_results", "community_info"],
+            template="""Evaluate the social impact and community aspects of this proposal.
+
+# Proposal
+{proposal_data}
+
+# Community Information
+{community_info}
+
+# External Context
+{search_results}
+
+# Task
+Score this proposal from 0-100 based on:
+1. Community benefit and inclusion (40%)
+2. Alignment with community values and interests (30%)
+3. Potential for community engagement (20%)
+4. Consideration of diverse stakeholders (10%)
+
+When analyzing, consider:
+- Will this proposal benefit the broader community or just a few members?
+- Is there likely community support or opposition?
+- Does it foster inclusivity and participation?
+- Does it align with the community's values and interests?
+- Could it cause controversy or division?
+- Does it consider the needs of diverse stakeholders?
+
+# Output Format
+Provide:
+- Score (0-100)
+- List of any critical social issues or red flags
+- Brief summary of your social evaluation
+
+Only return a JSON object with these three fields: score, flags (array), and summary.""",
+        )
+
+        try:
+            formatted_prompt_text = prompt.format(
+                proposal_data=proposal_content,
+                search_results=search_results_text,
+                community_info=community_info,
+            )
+            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
+
+            # Add any proposal images to the message
+            proposal_images = state.get("proposal_images", [])
+            if proposal_images:
+                message_content_list.extend(proposal_images)
+
+            llm_input_message = HumanMessage(content=message_content_list)
+
+            # Get structured output from the LLM
+            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
+                [llm_input_message]
+            )
+            result_dict = result.model_dump()
+
+            # Track token usage
+            token_usage_data = self.track_token_usage(formatted_prompt_text, result)
+            state["token_usage"]["social_agent"] = token_usage_data
+            result_dict["token_usage"] = token_usage_data
+
+            # Update state with agent result
+            update_state_with_agent_result(state, result_dict, "social")
+            return result_dict
+        except Exception as e:
+            self.logger.error(
+                f"[DEBUG:SocialAgent:{proposal_id}] Error in social evaluation: {str(e)}"
+            )
+            return {
+                "score": 50,
+                "flags": [f"Error: {str(e)}"],
+                "summary": "Social evaluation failed due to error",
+            }
diff --git a/services/workflows/base.py b/services/workflows/base.py
index 856e00fe..2b0d9cd2 100644
--- a/services/workflows/base.py
+++ b/services/workflows/base.py
@@ -179,55 +179,40 @@ def get_missing_fields(self, state: StateType) -> List[str]:
         ]
 
     async def execute(self, initial_state: StateType) -> Dict:
-        """Execute the workflow.
+        """Execute the workflow with the given initial state.
 
         Args:
-            initial_state: The initial state for the workflow
+            initial_state: Initial state for the workflow
 
         Returns:
-            The final state after execution
-
-        Raises:
-            ValidationError: If the initial state is invalid
-            ExecutionError: If the workflow execution fails
+            Final state after workflow execution
         """
+        # Validate state
+        if not self._validate_state(initial_state):
+            error_message = f"Invalid initial state: {initial_state}"
+            self.logger.error(error_message)
+            missing = self.get_missing_fields(initial_state)
+            if missing:
+                error_message += f" Missing fields: {', '.join(missing)}"
+            raise ValidationError(error_message)
+
+        # Create runtime workflow
+        app = self._create_graph()
+
+        self.logger.debug(
+            f"[DEBUG:Workflow:{self.__class__.__name__}] State before ain_invoke: {json.dumps(initial_state, indent=2, default=str)}"
+        )
         try:
-            # Validate state
-            is_valid = self._validate_state(initial_state)
-            if not is_valid:
-                missing_fields = self.get_missing_fields(initial_state)
-                error_msg = (
-                    f"Invalid initial state. Missing required fields: {missing_fields}"
-                )
-                self.logger.error(error_msg)
-                raise ValidationError(error_msg, {"missing_fields": missing_fields})
-
-            # Create and compile the graph
-            graph = self._create_graph()
-            if hasattr(graph, "compile"):
-                app = graph.compile()
-            else:
-                # Graph is already compiled
-                app = graph
-
             # Execute the workflow
-            self.logger.info(f"Executing workflow {self.__class__.__name__}")
-            self.logger.debug(
-                f"[DEBUG:Workflow:{self.__class__.__name__}] State before ain_invoke: {repr(initial_state)}"
-            )
             result = await app.ainvoke(initial_state)
             self.logger.debug(
-                f"[DEBUG:Workflow:{self.__class__.__name__}] State after ain_invoke: {repr(result)}"
+                f"[DEBUG:Workflow:{self.__class__.__name__}] State after ain_invoke: {json.dumps(result, indent=2, default=str)}"
             )
-            self.logger.info(f"Workflow {self.__class__.__name__} execution completed")
             return result
-
-        except ValidationError as e:
-            # Re-raise validation errors
-            raise e
         except Exception as e:
-            self.logger.error(f"Workflow execution failed: {str(e)}", exc_info=True)
-            raise ExecutionError(f"Workflow execution failed: {str(e)}")
+            error_message = f"Workflow execution failed: {str(e)}"
+            self.logger.error(error_message)
+            raise ExecutionError(error_message) from e
 
 
 class BaseWorkflowMixin(ABC):
diff --git a/services/workflows/proposal_evaluation.py b/services/workflows/proposal_evaluation.py
index 46d3ac98..a5f553ad 100644
--- a/services/workflows/proposal_evaluation.py
+++ b/services/workflows/proposal_evaluation.py
@@ -1,16 +1,11 @@
 import asyncio
-import base64
 import operator
 import uuid
 from typing import Annotated, Any, Dict, List, Optional, TypedDict, Union
 
-import httpx
 from langchain.prompts import PromptTemplate
-from langchain_core.messages import HumanMessage
-from langchain_openai import ChatOpenAI
 from langgraph.channels import LastValue
 from langgraph.graph import END, Graph, StateGraph
-from pydantic import BaseModel, Field
 
 from backend.factory import backend
 from backend.models import (
@@ -19,112 +14,39 @@
     Profile,
     PromptFilter,
     ProposalBase,
+    ProposalFilter,
     ProposalType,
     QueueMessageFilter,
     QueueMessageType,
 )
 from lib.hiro import HiroApi
 from lib.logger import configure_logger
-from lib.utils import (
-    calculate_token_cost,
-    extract_image_urls,
-)
-from services.workflows.base import (
-    BaseWorkflow,
-)
-from services.workflows.capability_mixins import BaseCapabilityMixin
+from services.workflows.agents.core_context import CoreContextAgent
+from services.workflows.agents.financial_context import FinancialContextAgent
+from services.workflows.agents.historical_context import HistoricalContextAgent
+from services.workflows.agents.image_processing import ImageProcessingNode
+from services.workflows.agents.reasoning import ReasoningAgent
+from services.workflows.agents.social_context import SocialContextAgent
+from services.workflows.base import BaseWorkflow
 from services.workflows.chat import ChatService, StreamingCallbackHandler
 from services.workflows.hierarchical_workflows import (
     HierarchicalTeamWorkflow,
     append_list_fn,
     merge_dict_fn,
 )
-from services.workflows.planning_mixin import PlanningCapability
-from services.workflows.vector_mixin import VectorRetrievalCapability
-from services.workflows.web_search_mixin import WebSearchCapability
+from services.workflows.utils.models import FinalOutput, ProposalEvaluationOutput
+from services.workflows.utils.state_reducers import (
+    merge_dicts,
+    no_update_reducer,
+    set_once,
+    update_state_with_agent_result,
+)
 from tools.dao_ext_action_proposals import VoteOnActionProposalTool
 from tools.tools_factory import filter_tools_by_names, initialize_tools
 
 logger = configure_logger(__name__)
 
 
-class ProposalEvaluationOutput(BaseModel):
-    """Output model for proposal evaluation."""
-
-    approve: bool = Field(
-        description="Decision: true to approve (vote FOR), false to reject (vote AGAINST)"
-    )
-    confidence_score: float = Field(
-        description="Confidence score for the decision (0.0-1.0)"
-    )
-    reasoning: str = Field(description="The reasoning behind the evaluation decision")
-
-
-def no_update_reducer(current: Any, new: List[Any]) -> Any:
-    """Reducer that prevents updates after initial value is set."""
-    # Treat initial empty string for str types as if it were None for accepting the first value
-    is_initial_empty_string = isinstance(current, str) and current == ""
-
-    # If current is genuinely set (not None and not initial empty string), keep it.
-    if current is not None and not is_initial_empty_string:
-        return current
-
-    # Current is None or an initial empty string. Try to set it from new.
-    processed_new_values = (
-        new if isinstance(new, list) else [new]
-    )  # Ensure 'new' is a list
-    for n_val in processed_new_values:
-        if n_val is not None:
-            return n_val
-
-    # If current was None/initial empty string and new is all None or empty, return current (which is None or '')
-    return current
-
-
-def merge_dicts(current: Optional[Dict], updates: List[Optional[Dict]]) -> Dict:
-    """Merge multiple dictionary updates into the current dictionary."""
-    # Initialize current if it's None
-    if current is None:
-        current = {}
-
-    # Handle case where updates is None
-    if updates is None:
-        return current
-
-    # Process updates if it's a list
-    if isinstance(updates, list):
-        for update in updates:
-            if update and isinstance(update, dict):
-                current.update(update)
-    # Handle case where updates is a single dictionary, not a list
-    elif isinstance(updates, dict):
-        current.update(updates)
-
-    return current
-
-
-def set_once(current: Any, updates: List[Any]) -> Any:
-    """Set the value once and prevent further updates."""
-    # If current already has a value, return it unchanged
-    if current is not None:
-        return current
-
-    # Handle case where updates is None instead of a list
-    if updates is None:
-        return None
-
-    # Process updates if it's a list
-    if isinstance(updates, list):
-        for update in updates:
-            if update is not None:
-                return update
-    # Handle case where updates is a single value, not a list
-    elif updates is not None:
-        return updates
-
-    return current
-
-
 class ProposalEvaluationState(TypedDict):
     """Type definition for the proposal evaluation state."""
 
@@ -146,1089 +68,15 @@ class ProposalEvaluationState(TypedDict):
     proposal_images: Annotated[Optional[List[Dict]], set_once]
 
 
-class AgentOutput(BaseModel):
-    """Output model for agent evaluations."""
-
-    score: int = Field(description="Score from 0-100")
-    flags: List[str] = Field(description="Critical issues flagged")
-    summary: str = Field(description="Summary of findings")
-
-
-class FinalOutput(BaseModel):
-    """Output model for the final evaluation decision."""
-
-    score: int = Field(description="Final evaluation score")
-    decision: str = Field(description="Approve or Reject")
-    explanation: str = Field(description="Reasoning for decision")
-
-
-def update_state_with_agent_result(
-    state: ProposalEvaluationState, agent_result: Dict[str, Any], agent_name: str
-):
-    """Helper function to update state with agent result including summaries and flags."""
-    # Simplified logging - just log once with relevant details
-    logger.debug(
-        f"[DEBUG:update_state:{agent_name}] Updating state with {agent_name}_score (score: {agent_result.get('score', 'N/A')})"
-    )
-
-    # Update agent score in state
-    if agent_name in ["core", "historical", "financial", "social", "final"]:
-        # Make a copy of agent_result to avoid modifying the original
-        score_dict = dict(agent_result)
-        # Don't pass token_usage through this path to avoid duplication
-        if "token_usage" in score_dict:
-            del score_dict["token_usage"]
-
-        # Directly assign the dictionary to the state key
-        state[f"{agent_name}_score"] = score_dict
-
-    # Update summaries
-    if "summaries" not in state:
-        state["summaries"] = {}
-
-    if "summary" in agent_result and agent_result["summary"]:
-        state["summaries"][f"{agent_name}_score"] = agent_result["summary"]
-
-    # Update flags
-    if "flags" not in state:
-        state["flags"] = []
-
-    if "flags" in agent_result and isinstance(agent_result["flags"], list):
-        state["flags"].extend(agent_result["flags"])
-
-    # Note: Token usage is already directly handled by each agent via state["token_usage"]["{agent_name}_agent"]
-    # So we don't need to do anything with token usage here
-
-    return state
-
-
-class CoreContextAgent(BaseCapabilityMixin, VectorRetrievalCapability):
-    """Core Context Agent evaluates proposals against DAO mission and standards."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        """Initialize the Core Context Agent."""
-        BaseCapabilityMixin.__init__(self, config=config, state_key="core_score")
-        VectorRetrievalCapability.__init__(self)
-        self.initialize()
-        self._initialize_vector_capability()
-
-    def _initialize_vector_capability(self):
-        """Initialize the vector retrieval functionality."""
-        if not hasattr(self, "retrieve_from_vector_store"):
-            self.retrieve_from_vector_store = (
-                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
-                    self, self.__class__
-                )
-            )
-            self.logger.info(
-                "Initialized vector retrieval capability for CoreContextAgent"
-            )
-
-    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
-        """Evaluate the proposal against DAO core mission and standards."""
-        self._initialize_vector_capability()
-
-        proposal_id = state.get("proposal_id", "unknown")
-        proposal_content = state.get("proposal_data", "")
-
-        dao_mission_text = self.config.get("dao_mission", "")
-        if not dao_mission_text:
-            try:
-                self.logger.debug(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Attempting to retrieve DAO mission from vector store"
-                )
-                dao_mission = await self.retrieve_from_vector_store(
-                    query="DAO mission statement and values",
-                    collection_name=self.config.get(
-                        "mission_collection", "dao_documents"
-                    ),
-                    limit=3,
-                )
-                dao_mission_text = "\n".join([doc.page_content for doc in dao_mission])
-                self.logger.debug(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Retrieved DAO mission, length: {len(dao_mission_text)}"
-                )
-            except Exception as e:
-                self.logger.error(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Error retrieving DAO mission: {str(e)}",
-                    exc_info=True,
-                )
-                dao_mission_text = "Elevate human potential through AI on Bitcoin"
-                self.logger.debug(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Using default DAO mission: {dao_mission_text}"
-                )
-
-        prompt = PromptTemplate(
-            input_variables=["proposal_data", "dao_mission"],
-            template="""Evaluate the following proposal against the DAO's mission and values.\\n            
-Proposal: {proposal_data}\\nDAO Mission: {dao_mission}\\n
-Assess whether this proposal aligns with the DAO's core mission and values.\\nConsider:\\n1. Mission Alignment: Does it directly support the stated mission?\\n2. Quality Standards: Does it meet quality requirements?\\n3. Innovation: Does it bring new ideas aligned with our vision?\\n4. Impact: How significant is its potential contribution?\\n
-# ADDED: Image processing instructions
-**Image Analysis Instructions:**
-If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
-- Relevance: Does each image directly relate to and support the proposal's text?
-- Evidence: Do the images provide visual evidence for claims made in the proposal?
-- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
-- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
-
-Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
-            """,
-        )
-
-        try:
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] Formatting prompt for evaluation"
-            )
-            formatted_prompt_text = prompt.format(
-                proposal_data=proposal_content,
-                dao_mission=dao_mission_text
-                or "Elevate human potential through AI on Bitcoin",
-            )
-            debug_level = self.config.get("debug_level", 0)
-            if debug_level >= 2:
-                self.logger.debug(
-                    f"[PROPOSAL_DEBUG:CoreAgent] FULL EVALUATION PROMPT:\n{formatted_prompt_text}"
-                )
-            else:
-                self.logger.debug(
-                    f"[PROPOSAL_DEBUG:CoreAgent] Generated evaluation prompt: {formatted_prompt_text}"
-                )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:CoreAgent:{proposal_id}] Error formatting prompt: {str(e)}",
-                exc_info=True,
-            )
-            formatted_prompt_text = f"Evaluate proposal: {proposal_content}"
-
-        try:
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] Invoking LLM for core evaluation"
-            )
-
-            # ADDED: Image handling
-            proposal_images_list = state.get("proposal_images", [])
-            if not isinstance(proposal_images_list, list):
-                self.logger.warning(
-                    f"[DEBUG:CoreAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
-                )
-                proposal_images_list = []
-
-            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
-            if proposal_images_list:
-                self.logger.debug(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
-                )
-                message_content_list.extend(proposal_images_list)
-
-            llm_input_message = HumanMessage(content=message_content_list)
-
-            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
-                [llm_input_message]
-            )
-            self.logger.debug(
-                f"[DEBUG:CoreAgent:{proposal_id}] LLM returned core evaluation with score: {result.score}"
-            )
-            self.logger.info(
-                f"[DEBUG:CoreAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
-            )
-
-            # Track token usage - extract directly from LLM if available
-            token_usage_data = {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "total_tokens": 0,
-            }
-
-            # Use the Annotated operator.add feature by assigning 1 to increment
-            # This is safe with concurrent execution
-            state["core_agent_invocations"] = 1
-
-            # Try to extract token usage directly from LLM response
-            if (
-                hasattr(self.llm, "_last_prompt_id")
-                and hasattr(self.llm, "client")
-                and hasattr(self.llm.client, "usage_by_prompt_id")
-            ):
-                last_prompt_id = self.llm._last_prompt_id
-                if last_prompt_id in self.llm.client.usage_by_prompt_id:
-                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
-                    token_usage_data = {
-                        "input_tokens": usage.get("prompt_tokens", 0),
-                        "output_tokens": usage.get("completion_tokens", 0),
-                        "total_tokens": usage.get("total_tokens", 0),
-                    }
-                    self.logger.debug(
-                        f"[DEBUG:CoreAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
-                    )
-            # Fallback to estimation
-            if token_usage_data["total_tokens"] == 0:
-                # Get model name from LLM
-                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
-                # First calculate token count from the text
-                token_count = len(formatted_prompt_text) // 4  # Simple estimation
-                # Create token usage dictionary for calculate_token_cost
-                token_usage_dict = {"input_tokens": token_count}
-                # Calculate cost
-                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
-                token_usage_data = {
-                    "input_tokens": token_count,
-                    "output_tokens": len(result.model_dump_json())
-                    // 4,  # rough estimate
-                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
-                    "model_name": llm_model_name,  # Include model name
-                }
-                self.logger.debug(
-                    f"[DEBUG:CoreAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
-                )
-
-            # Add token usage to state
-            if "token_usage" not in state:
-                state["token_usage"] = {}
-            state["token_usage"]["core_agent"] = token_usage_data
-
-            result_dict = result.model_dump()
-            # Add token usage to result_dict so it's properly processed
-            result_dict["token_usage"] = token_usage_data
-
-            # Remove verbose debug logs and simply update state
-            update_state_with_agent_result(state, result_dict, "core")
-
-            return result_dict
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:CoreAgent:{proposal_id}] Error in core evaluation: {str(e)}",
-                exc_info=True,
-            )
-            fallback_score_dict = {
-                "score": 50,
-                "flags": [f"Error: {str(e)}"],
-                "summary": "Evaluation failed due to error",
-            }
-            self.logger.info(
-                f"[DEBUG:CoreAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
-            )
-            return fallback_score_dict
-
-
-class HistoricalContextAgent(BaseCapabilityMixin, VectorRetrievalCapability):
-    """Historical Context Agent examines past proposals and patterns."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        BaseCapabilityMixin.__init__(self, config=config, state_key="historical_score")
-        VectorRetrievalCapability.__init__(self)
-        self.initialize()
-        self._initialize_vector_capability()
-
-    def _initialize_vector_capability(self):
-        if not hasattr(self, "retrieve_from_vector_store"):
-            self.retrieve_from_vector_store = (
-                VectorRetrievalCapability.retrieve_from_vector_store.__get__(
-                    self, self.__class__
-                )
-            )
-            self.logger.info(
-                "Initialized vector retrieval capability for HistoricalContextAgent"
-            )
-
-    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
-        proposal_id = state.get("proposal_id", "unknown")
-        self._initialize_vector_capability()
-        proposal_content = state.get("proposal_data", "")
-
-        historical_text = ""
-        try:
-            self.logger.debug(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Searching for similar proposals: {proposal_content[:50]}..."
-            )
-            similar_proposals = await self.retrieve_from_vector_store(
-                query=f"Proposals similar to: {proposal_content}",
-                collection_name=self.config.get(
-                    "proposals_collection", "past_proposals"
-                ),
-                limit=5,
-            )
-            historical_text = "\n".join([doc.page_content for doc in similar_proposals])
-            self.logger.debug(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Found {len(similar_proposals)} similar proposals"
-            )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Error retrieving historical proposals: {str(e)}",
-                exc_info=True,
-            )
-            historical_text = "No similar historical proposals found."
-        prompt = PromptTemplate(
-            input_variables=["proposal_data", "historical_proposals"],
-            template="""Analyze this proposal in the context of historical patterns and similar past proposals.\\n            
-Current Proposal: {proposal_data}\\nSimilar Past Proposals: {historical_proposals}\\n
-Evaluate:\\n1. Precedent: Have similar proposals been approved or rejected?\\n2. Cross-DAO Similarities: How does this compare to proposals in similar DAOs?\\n3. Learning from Past: Does it address issues from past proposals?\\n4. Uniqueness: Is this novel or repeating past ideas?\\n
-# ADDED: Image processing instructions
-**Image Analysis Instructions:**
-If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
-- Relevance: Does each image directly relate to and support the proposal's text?
-- Evidence: Do the images provide visual evidence for claims made in the proposal?
-- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
-- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
-
-Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
-            """,
-        )
-        try:
-            self.logger.debug(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Formatting prompt"
-            )
-            formatted_prompt_text = prompt.format(
-                proposal_data=proposal_content,
-                historical_proposals=historical_text
-                or "No similar historical proposals found.",
-            )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Error formatting prompt: {str(e)}",
-                exc_info=True,
-            )
-            formatted_prompt_text = f"Analyze proposal: {proposal_content}"
-        try:
-            self.logger.debug(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Invoking LLM for historical evaluation"
-            )
-
-            # ADDED: Image handling
-            proposal_images_list = state.get("proposal_images", [])
-            if not isinstance(proposal_images_list, list):
-                self.logger.warning(
-                    f"[DEBUG:HistoricalAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
-                )
-                proposal_images_list = []
-
-            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
-            if proposal_images_list:
-                self.logger.debug(
-                    f"[DEBUG:HistoricalAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
-                )
-                message_content_list.extend(proposal_images_list)
-
-            llm_input_message = HumanMessage(content=message_content_list)
-
-            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
-                [llm_input_message]
-            )
-            self.logger.info(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
-            )
-
-            # Track token usage - extract directly from LLM if available
-            token_usage_data = {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "total_tokens": 0,
-            }
-
-            # Try to extract token usage directly from LLM response
-            if (
-                hasattr(self.llm, "_last_prompt_id")
-                and hasattr(self.llm, "client")
-                and hasattr(self.llm.client, "usage_by_prompt_id")
-            ):
-                last_prompt_id = self.llm._last_prompt_id
-                if last_prompt_id in self.llm.client.usage_by_prompt_id:
-                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
-                    token_usage_data = {
-                        "input_tokens": usage.get("prompt_tokens", 0),
-                        "output_tokens": usage.get("completion_tokens", 0),
-                        "total_tokens": usage.get("total_tokens", 0),
-                    }
-                    self.logger.debug(
-                        f"[DEBUG:HistoricalAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
-                    )
-            # Fallback to estimation
-            if token_usage_data["total_tokens"] == 0:
-                # Get model name from LLM
-                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
-                # First calculate token count from the text
-                token_count = len(formatted_prompt_text) // 4  # Simple estimation
-                # Create token usage dictionary for calculate_token_cost
-                token_usage_dict = {"input_tokens": token_count}
-                # Calculate cost
-                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
-                token_usage_data = {
-                    "input_tokens": token_count,
-                    "output_tokens": len(result.model_dump_json())
-                    // 4,  # rough estimate
-                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
-                    "model_name": llm_model_name,  # Include model name
-                }
-                self.logger.debug(
-                    f"[DEBUG:HistoricalAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
-                )
-
-            # Add token usage to state
-            if "token_usage" not in state:
-                state["token_usage"] = {}
-            state["token_usage"]["historical_agent"] = token_usage_data
-
-            result_dict = result.model_dump()
-            # Add token usage to result_dict so it's properly processed
-            result_dict["token_usage"] = token_usage_data
-
-            # Update state with the result
-            update_state_with_agent_result(state, result_dict, "historical")
-            return result_dict
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Error in historical evaluation: {str(e)}",
-                exc_info=True,
-            )
-            fallback_score_dict = {
-                "score": 50,
-                "flags": [f"Error: {str(e)}"],
-                "summary": "Evaluation failed due to error",
-            }
-            self.logger.info(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
-            )
-            return fallback_score_dict
-
-
-class FinancialContextAgent(BaseCapabilityMixin):
-    """Financial Context Agent evaluates treasury impact and financial viability."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        super().__init__(config=config, state_key="financial_score")
-        self.initialize()
-
-    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
-        proposal_id = state.get("proposal_id", "unknown")
-        treasury_balance = state.get(
-            "treasury_balance", self.config.get("treasury_balance", 1000000)
-        )
-        proposal_content = state.get("proposal_data", "")
-
-        prompt = PromptTemplate(
-            input_variables=["proposal_data", "treasury_balance"],
-            template="""Assess the financial aspects of this proposal.\\n            
-Proposal: {proposal_data}\\nCurrent Treasury Balance: {treasury_balance}\\n
-Evaluate:\\n1. Cost-Benefit Analysis: Is the ROI reasonable?\\n2. Treasury Impact: What percentage of treasury would this use?\\n3. Budget Alignment: Does it align with budget priorities?\\n4. Projected Impact: What's the expected financial outcome?\\n5. Risk Assessment: What financial risks might arise?\\n
-# ADDED: Image processing instructions
-**Image Analysis Instructions:**
-If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
-- Relevance: Does each image directly relate to and support the proposal's text?
-- Evidence: Do the images provide visual evidence for claims made in the proposal (e.g., screenshots of transactions, diagrams of financial models if applicable)?
-- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
-- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
-
-Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
-            """,
-        )
-        try:
-            self.logger.debug(
-                f"[DEBUG:FinancialAgent:{proposal_id}] Formatting prompt for financial evaluation"
-            )
-            formatted_prompt_text = prompt.format(
-                proposal_data=proposal_content,
-                treasury_balance=treasury_balance,
-            )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:FinancialAgent:{proposal_id}] Error formatting prompt: {str(e)}",
-                exc_info=True,
-            )
-            formatted_prompt_text = (
-                f"Assess financial aspects of proposal: {proposal_content}"
-            )
-        try:
-            self.logger.debug(
-                f"[DEBUG:FinancialAgent:{proposal_id}] Invoking LLM for financial evaluation"
-            )
-
-            # ADDED: Image handling
-            proposal_images = state.get("proposal_images", [])
-            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
-            if proposal_images:
-                logger.debug(
-                    f"[DEBUG:FinancialAgent:{proposal_id}] Adding {len(proposal_images)} images to LLM input."
-                )
-                message_content_list.extend(proposal_images)
-
-            llm_input_message = HumanMessage(content=message_content_list)
-
-            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
-                [llm_input_message]
-            )
-            self.logger.info(
-                f"[DEBUG:FinancialAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
-            )
-
-            # Track token usage - extract directly from LLM if available
-            token_usage_data = {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "total_tokens": 0,
-            }
-
-            # Try to extract token usage directly from LLM response
-            if (
-                hasattr(self.llm, "_last_prompt_id")
-                and hasattr(self.llm, "client")
-                and hasattr(self.llm.client, "usage_by_prompt_id")
-            ):
-                last_prompt_id = self.llm._last_prompt_id
-                if last_prompt_id in self.llm.client.usage_by_prompt_id:
-                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
-                    token_usage_data = {
-                        "input_tokens": usage.get("prompt_tokens", 0),
-                        "output_tokens": usage.get("completion_tokens", 0),
-                        "total_tokens": usage.get("total_tokens", 0),
-                    }
-                    self.logger.debug(
-                        f"[DEBUG:FinancialAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
-                    )
-            # Fallback to estimation
-            if token_usage_data["total_tokens"] == 0:
-                # Get model name from LLM
-                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
-                # First calculate token count from the text
-                token_count = len(formatted_prompt_text) // 4  # Simple estimation
-                # Create token usage dictionary for calculate_token_cost
-                token_usage_dict = {"input_tokens": token_count}
-                # Calculate cost
-                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
-                token_usage_data = {
-                    "input_tokens": token_count,
-                    "output_tokens": len(result.model_dump_json())
-                    // 4,  # rough estimate
-                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
-                    "model_name": llm_model_name,  # Include model name
-                }
-                self.logger.debug(
-                    f"[DEBUG:FinancialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
-                )
-
-            # Add token usage to state
-            if "token_usage" not in state:
-                state["token_usage"] = {}
-            state["token_usage"]["financial_agent"] = token_usage_data
-
-            result_dict = result.model_dump()
-            # Add token usage to result_dict so it's properly processed
-            result_dict["token_usage"] = token_usage_data
-
-            # Update state with the result
-            update_state_with_agent_result(state, result_dict, "financial")
-            return result_dict
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:FinancialAgent:{proposal_id}] Error in financial evaluation: {str(e)}",
-                exc_info=True,
-            )
-            fallback_score_dict = {
-                "score": 50,
-                "flags": [f"Error: {str(e)}"],
-                "summary": "Evaluation failed due to error",
-            }
-            self.logger.info(
-                f"[DEBUG:FinancialAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
-            )
-            return fallback_score_dict
-
-
-class ImageProcessingNode(BaseCapabilityMixin):
-    """A workflow node to process proposal images: extract URLs, download, and base64 encode."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        super().__init__(config=config, state_key="proposal_images")
-        self.initialize()
-
-    async def process(self, state: ProposalEvaluationState) -> List[Dict[str, Any]]:
-        """The core logic for processing images, returns the list of processed image dicts directly."""
-        proposal_id = state.get("proposal_id", "unknown")
-        proposal_data_str = state.get("proposal_data", "")
-
-        if not proposal_data_str:
-            self.logger.info(
-                f"[ImageProcessorNode:{proposal_id}] No proposal_data string, skipping image processing."
-            )
-            return []  # Return empty list, not None
-
-        self.logger.info(
-            f"[ImageProcessorNode:{proposal_id}] Starting image processing."
-        )
-        image_urls = extract_image_urls(proposal_data_str)
-
-        if not image_urls:
-            self.logger.info(
-                f"[ImageProcessorNode:{proposal_id}] No image URLs found in proposal data."
-            )
-            return []  # Return empty list, not None
-
-        self.logger.info(
-            f"[ImageProcessorNode:{proposal_id}] Found {len(image_urls)} image URLs: {image_urls}"
-        )
-
-        processed_images = []
-        async with httpx.AsyncClient() as client:
-            for url in image_urls:
-                try:
-                    self.logger.debug(
-                        f"[ImageProcessorNode:{proposal_id}] Downloading image from {url}"
-                    )
-                    response = await client.get(url, timeout=10.0)
-                    response.raise_for_status()
-                    image_data = base64.b64encode(response.content).decode("utf-8")
-                    mime_type = "image/jpeg"
-                    if url.lower().endswith((".jpg", ".jpeg")):
-                        mime_type = "image/jpeg"
-                    elif url.lower().endswith(".png"):
-                        mime_type = "image/png"
-                    elif url.lower().endswith(".gif"):
-                        mime_type = "image/gif"
-                    elif url.lower().endswith(".webp"):
-                        mime_type = "image/webp"
-
-                    processed_images.append(
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:{mime_type};base64,{image_data}"
-                            },
-                        }
-                    )
-                    self.logger.debug(
-                        f"[ImageProcessorNode:{proposal_id}] Successfully processed image from {url}"
-                    )
-                except httpx.HTTPStatusError as e:
-                    self.logger.error(
-                        f"[ImageProcessorNode:{proposal_id}] HTTP error for {url}: {e.response.status_code}",
-                        exc_info=False,
-                    )
-                except httpx.RequestError as e:
-                    self.logger.error(
-                        f"[ImageProcessorNode:{proposal_id}] Request error for {url}: {str(e)}",
-                        exc_info=False,
-                    )
-                except Exception as e:
-                    self.logger.error(
-                        f"[ImageProcessorNode:{proposal_id}] Generic error for {url}: {str(e)}",
-                        exc_info=True,
-                    )
-
-        self.logger.info(
-            f"[ImageProcessorNode:{proposal_id}] Finished. {len(processed_images)} images processed."
-        )
-        return processed_images  # This will be a list, possibly empty
-
-
-class SocialContextAgent(BaseCapabilityMixin, WebSearchCapability):
-    """Social Context Agent gauges community sentiment and social impact."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        BaseCapabilityMixin.__init__(self, config=config, state_key="social_score")
-        WebSearchCapability.__init__(self)
-        self.initialize()
-        self._initialize_web_search_capability()
-
-    def _initialize_web_search_capability(self):
-        if not hasattr(self, "search_web"):
-            self.search_web = WebSearchCapability.search_web.__get__(
-                self, self.__class__
-            )
-            self.logger.info("Initialized web search capability for SocialContextAgent")
-
-    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
-        proposal_id = state.get("proposal_id", "unknown")
-        self._initialize_web_search_capability()
-        proposal_content = state.get("proposal_data", "")
-
-        social_context = ""
-        if self.config.get("enable_web_search", True):
-            try:
-                search_query = (
-                    f"Community sentiment {proposal_content[:50]} cryptocurrency DAO"
-                )
-                self.logger.debug(
-                    f"[DEBUG:SocialAgent:{proposal_id}] Performing web search: {search_query}"
-                )
-                search_results, web_search_token_usage = await self.search_web(
-                    query=search_query,
-                    num_results=3,
-                )
-                social_context = "\n".join(
-                    [f"{r.get('page_content', '')}" for r in search_results]
-                )
-                self.logger.debug(
-                    f"[DEBUG:SocialAgent:{proposal_id}] Found {len(search_results)} web search results"
-                )
-
-                # Store web search token usage
-                if "token_usage" not in state:
-                    state["token_usage"] = {}
-                state["token_usage"]["social_web_search"] = web_search_token_usage
-
-            except Exception as e:
-                logger.error(
-                    f"[DEBUG:SocialAgent:{proposal_id}] Web search failed: {str(e)}",
-                    exc_info=True,
-                )
-                social_context = "Web search unavailable."
-        prompt = PromptTemplate(
-            input_variables=["proposal_data", "social_context"],
-            template="""Gauge the community sentiment and social impact of this proposal.\\n            
-Proposal: {proposal_data}\\nSocial Context: {social_context}\\n
-Evaluate:\\n1. Community Sentiment: How might members perceive this?\\n2. Social Media Presence: Any discussions online about this?\\n3. Engagement Potential: Will this engage the community?\\n4. Cross-Platform Analysis: How does sentiment vary across platforms?\\n5. Social Risk: Any potential for controversy or division?\\n
-# ADDED: Image processing instructions
-**Image Analysis Instructions:**
-If images are provided with this proposal (they will appear after this text), you MUST analyze them as an integral part of the proposal.
-- Relevance: Does each image directly relate to and support the proposal's text or the community/social aspects being discussed?
-- Evidence: Do the images provide visual evidence for claims made (e.g., screenshots of community discussions, mockups of social impact visuals)?
-- Authenticity & Quality: Are the images clear, authentic, and not misleading or manipulated?
-- Cohesion: The images and text MUST form a cohesive and consistent whole. If any image contradicts the text, is irrelevant, misleading, of very poor quality, or inappropriate, you should consider this a significant flaw in the proposal.
-
-Provide a score from 0-100, flag any critical issues (including image-related ones), and summarize your findings, explicitly mentioning your image analysis if images were present.\\
-            """,
-        )
-        try:
-            self.logger.debug(
-                f"[DEBUG:SocialAgent:{proposal_id}] Formatting prompt for social evaluation"
-            )
-            formatted_prompt_text = prompt.format(
-                proposal_data=proposal_content,
-                social_context=social_context,
-            )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:SocialAgent:{proposal_id}] Error formatting prompt: {str(e)}",
-                exc_info=True,
-            )
-            formatted_prompt_text = (
-                f"Gauge social impact of proposal: {proposal_content}"
-            )
-        try:
-            self.logger.debug(
-                f"[DEBUG:SocialAgent:{proposal_id}] Invoking LLM for social evaluation"
-            )
-
-            # ADDED: Image handling
-            proposal_images_list = state.get("proposal_images", [])
-            if not isinstance(proposal_images_list, list):
-                self.logger.warning(
-                    f"[DEBUG:SocialAgent:{proposal_id}] proposal_images is not a list: {type(proposal_images_list)}. Defaulting to empty list."
-                )
-                proposal_images_list = []
-
-            message_content_list = [{"type": "text", "text": formatted_prompt_text}]
-            if proposal_images_list:
-                self.logger.debug(
-                    f"[DEBUG:SocialAgent:{proposal_id}] Adding {len(proposal_images_list)} images to LLM input."
-                )
-                message_content_list.extend(proposal_images_list)
-
-            llm_input_message = HumanMessage(content=message_content_list)
-
-            result = await self.llm.with_structured_output(AgentOutput).ainvoke(
-                [llm_input_message]
-            )
-            self.logger.info(
-                f"[DEBUG:SocialAgent:{proposal_id}] SCORE={result.score}/100 | FLAGS={result.flags} | SUMMARY={result.summary}"
-            )
-
-            # Track token usage - extract directly from LLM if available
-            token_usage_data = {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "total_tokens": 0,
-            }
-
-            # Try to extract token usage directly from LLM response
-            if (
-                hasattr(self.llm, "_last_prompt_id")
-                and hasattr(self.llm, "client")
-                and hasattr(self.llm.client, "usage_by_prompt_id")
-            ):
-                last_prompt_id = self.llm._last_prompt_id
-                if last_prompt_id in self.llm.client.usage_by_prompt_id:
-                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
-                    token_usage_data = {
-                        "input_tokens": usage.get("prompt_tokens", 0),
-                        "output_tokens": usage.get("completion_tokens", 0),
-                        "total_tokens": usage.get("total_tokens", 0),
-                    }
-                    self.logger.debug(
-                        f"[DEBUG:SocialAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
-                    )
-            # Fallback to estimation
-            if token_usage_data["total_tokens"] == 0:
-                # Get model name from LLM
-                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
-                # First calculate token count from the text
-                token_count = len(formatted_prompt_text) // 4  # Simple estimation
-                # Create token usage dictionary for calculate_token_cost
-                token_usage_dict = {"input_tokens": token_count}
-                # Calculate cost
-                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
-                token_usage_data = {
-                    "input_tokens": token_count,
-                    "output_tokens": len(result.model_dump_json())
-                    // 4,  # rough estimate
-                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
-                    "model_name": llm_model_name,  # Include model name
-                }
-                self.logger.debug(
-                    f"[DEBUG:SocialAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
-                )
-
-            # Add token usage to state
-            if "token_usage" not in state:
-                state["token_usage"] = {}
-            state["token_usage"]["social_agent"] = token_usage_data
-
-            result_dict = result.model_dump()
-            # Add token usage to result_dict so it's properly processed
-            result_dict["token_usage"] = token_usage_data
-
-            # Update state with the result
-            update_state_with_agent_result(state, result_dict, "social")
-            return result_dict
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:SocialAgent:{proposal_id}] Error in social evaluation: {str(e)}",
-                exc_info=True,
-            )
-            fallback_score_dict = {
-                "score": 50,
-                "flags": [f"Error: {str(e)}"],
-                "summary": "Evaluation failed due to error",
-            }
-            self.logger.info(
-                f"[DEBUG:SocialAgent:{proposal_id}] ERROR_SCORE=50/100 | FLAGS=[{str(e)}] | SUMMARY=Evaluation failed"
-            )
-            return fallback_score_dict
-
-
-class ReasoningAgent(BaseCapabilityMixin, PlanningCapability):
-    """Configuration & Reasoning Agent synthesizes evaluations and makes decisions."""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        """Initialize the Reasoning Agent."""
-        BaseCapabilityMixin.__init__(self, config=config, state_key="final_score")
-        self.initialize()
-        planning_queue = asyncio.Queue()
-        callback_handler = self.config.get(
-            "callback_handler"
-        ) or StreamingCallbackHandler(planning_queue)
-        PlanningCapability.__init__(
-            self,
-            callback_handler=callback_handler,
-            planning_llm=ChatOpenAI(
-                model=self.config.get("planning_model", "gpt-4.1-mini")
-            ),
-            persona="DAO Proposal Evaluator",
-        )
-        self._initialize_planning_capability()
-
-    def _initialize_planning_capability(self):
-        """Initialize planning capability methods."""
-        if not hasattr(self, "create_plan"):
-            self.create_plan = PlanningCapability.create_plan.__get__(
-                self, self.__class__
-            )
-            self.logger.info("Initialized planning capability for ReasoningAgent")
-
-    def integrate_with_graph(self, graph: StateGraph, **kwargs) -> None:
-        """Integrate planning capability with the graph."""
-        pass
-
-    async def process(self, state: ProposalEvaluationState) -> Dict[str, Any]:
-        proposal_id = state.get("proposal_id", "unknown")
-        self._initialize_planning_capability()
-        proposal_content = state.get("proposal_data", "")
-        self.logger.debug(
-            f"[DEBUG:ReasoningAgent:{proposal_id}] Beginning final evaluation processing with proposal_content (length: {len(proposal_content)})"
-        )
-
-        def safe_get_score(value, default=0):
-            if isinstance(value, dict) and "score" in value:
-                return value.get("score", default)
-            elif isinstance(value, int):
-                return value
-            return default
-
-        core_score = state.get("core_score", {})
-        historical_score = state.get("historical_score", {})
-        financial_score = state.get("financial_score", {})
-        social_score = state.get("social_score", {})
-
-        core_score_val = safe_get_score(core_score)
-        historical_score_val = safe_get_score(historical_score)
-        financial_score_val = safe_get_score(financial_score)
-        social_score_val = safe_get_score(social_score)
-
-        self.logger.debug(
-            f"[DEBUG:ReasoningAgent:{proposal_id}] Input scores: Core={core_score_val}, Historical={historical_score_val}, Financial={financial_score_val}, Social={social_score_val}"
-        )
-
-        scores = {
-            "Core Context": core_score_val,
-            "Historical Context": historical_score_val,
-            "Financial Context": financial_score_val,
-            "Social Context": social_score_val,
-        }
-        summaries = state.get("summaries", {})
-        flags = state.get("flags", [])
-
-        self.logger.debug(
-            f"[DEBUG:ReasoningAgent:{proposal_id}] Summaries: {summaries}"
-        )
-
-        self.logger.debug(f"[DEBUG:ReasoningAgent:{proposal_id}] Flags raised: {flags}")
-
-        # Update the summaries with the content from each agent's evaluation
-        if isinstance(core_score, dict) and "summary" in core_score:
-            summaries["core_score"] = core_score["summary"]
-        if isinstance(historical_score, dict) and "summary" in historical_score:
-            summaries["historical_score"] = historical_score["summary"]
-        if isinstance(financial_score, dict) and "summary" in financial_score:
-            summaries["financial_score"] = financial_score["summary"]
-        if isinstance(social_score, dict) and "summary" in social_score:
-            summaries["social_score"] = social_score["summary"]
-
-        # Update flags
-        for score_obj in [core_score, historical_score, financial_score, social_score]:
-            if (
-                isinstance(score_obj, dict)
-                and "flags" in score_obj
-                and isinstance(score_obj["flags"], list)
-            ):
-                flags.extend(score_obj["flags"])
-
-        prompt = PromptTemplate(
-            input_variables=["proposal_data", "scores", "summaries", "flags"],
-            template="""Synthesize all evaluations and make a final decision on this proposal.\\n            
-Proposal: {proposal_data}\\n
-Evaluations:\\n- Core Context (Score: {scores[Core Context]}): {summaries[core_score]}\\n- Historical Context (Score: {scores[Historical Context]}): {summaries[historical_score]}\\n- Financial Context (Score: {scores[Financial Context]}): {summaries[financial_score]}\\n- Social Context (Score: {scores[Social Context]}): {summaries[social_score]}\\n
-Flags Raised: {flags}\\n
-Synthesize these evaluations to:\\n1. Weigh the importance of each context\\n2. Calibrate confidence based on available information\\n3. Consider the implications of the flags raised\\n4. Make a final decision: Approve or Reject\\n5. Calculate an overall score\\n
-Provide a final score, decision (Approve/Reject), and detailed explanation.\\n            
-            """,
-        )
-
-        try:
-            for key in [
-                "core_score",
-                "historical_score",
-                "financial_score",
-                "social_score",
-            ]:
-                if key not in summaries:
-                    summaries[key] = "No evaluation available"
-
-            self.logger.debug(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] Formatting final evaluation prompt"
-            )
-            formatted_prompt_text = prompt.format(
-                proposal_data=proposal_content,
-                scores=scores,
-                summaries=summaries,
-                flags=", ".join(flags) if flags else "None",
-            )
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] Error formatting prompt: {str(e)}",
-                exc_info=True,
-            )
-            formatted_prompt_text = f"""Synthesize evaluations for proposal: {proposal_content}
-Scores: {scores}
-Flags: {flags}
-Provide a final score, decision (Approve/Reject), and explanation."""
-
-        try:
-            self.logger.debug(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] Invoking LLM for final decision"
-            )
-            result = await self.llm.with_structured_output(FinalOutput).ainvoke(
-                [formatted_prompt_text]
-            )
-
-            self.logger.info(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] FINAL DECISION: {result.decision} | SCORE={result.score}/100 | EXPLANATION={result.explanation}"
-            )
-
-            # Track token usage - extract directly from LLM if available
-            token_usage_data = {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "total_tokens": 0,
-            }
-
-            # Try to extract token usage directly from LLM response
-            if (
-                hasattr(self.llm, "_last_prompt_id")
-                and hasattr(self.llm, "client")
-                and hasattr(self.llm.client, "usage_by_prompt_id")
-            ):
-                last_prompt_id = self.llm._last_prompt_id
-                if last_prompt_id in self.llm.client.usage_by_prompt_id:
-                    usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
-                    token_usage_data = {
-                        "input_tokens": usage.get("prompt_tokens", 0),
-                        "output_tokens": usage.get("completion_tokens", 0),
-                        "total_tokens": usage.get("total_tokens", 0),
-                    }
-                    self.logger.debug(
-                        f"[DEBUG:ReasoningAgent:{proposal_id}] Extracted token usage from LLM: {token_usage_data}"
-                    )
-            # Fallback to estimation
-            if token_usage_data["total_tokens"] == 0:
-                # Get model name from LLM
-                llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
-                # First calculate token count from the text
-                token_count = len(formatted_prompt_text) // 4  # Simple estimation
-                # Create token usage dictionary for calculate_token_cost
-                token_usage_dict = {"input_tokens": token_count}
-                # Calculate cost
-                cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
-                token_usage_data = {
-                    "input_tokens": token_count,
-                    "output_tokens": len(result.model_dump_json())
-                    // 4,  # rough estimate
-                    "total_tokens": token_count + len(result.model_dump_json()) // 4,
-                    "model_name": llm_model_name,  # Include model name
-                }
-                self.logger.debug(
-                    f"[DEBUG:ReasoningAgent:{proposal_id}] Estimated token usage: {token_usage_data}"
-                )
-
-            # Add token usage to state
-            if "token_usage" not in state:
-                state["token_usage"] = {}
-            state["token_usage"]["reasoning_agent"] = token_usage_data
-
-            result_dict = result.model_dump()
-            # Add token usage to result_dict so it's properly processed
-            result_dict["token_usage"] = token_usage_data
-
-            # Update state with the result
-            update_state_with_agent_result(state, result_dict, "reasoning")
-            return result_dict
-        except Exception as e:
-            self.logger.error(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] Error in final evaluation: {str(e)}",
-                exc_info=True,
-            )
-            self.logger.info(
-                f"[DEBUG:ReasoningAgent:{proposal_id}] ERROR_SCORE=50/100 | DECISION=Pending | REASON=Error: {str(e)}"
-            )
-            return {
-                "score": 50,
-                "decision": "Pending",
-                "explanation": f"Unable to make final decision due to error: {str(e)}",
-            }
-
-
 class ProposalEvaluationWorkflow(BaseWorkflow[ProposalEvaluationState]):
     """Main workflow for evaluating DAO proposals using a hierarchical team."""
 
     def __init__(self, config: Optional[Dict[str, Any]] = None):
-        """Initialize the proposal evaluation workflow."""
+        """Initialize the proposal evaluation workflow.
+
+        Args:
+            config: Optional configuration dictionary
+        """
         super().__init__()
         self.config = config or {}
         self.hierarchical_workflow = HierarchicalTeamWorkflow(
@@ -1239,20 +87,18 @@ def __init__(self, config: Optional[Dict[str, Any]] = None):
             },
         )
 
-        # Instantiate and add the new ImageProcessingNode
-        image_processor_agent = ImageProcessingNode(
-            config=self.config
-        )  # Use self.config
-        self.hierarchical_workflow.add_sub_workflow(
-            "image_processor", image_processor_agent
-        )
-
+        # Initialize agents
+        image_processor_agent = ImageProcessingNode(config=self.config)
         core_agent = CoreContextAgent(self.config)
         historical_agent = HistoricalContextAgent(self.config)
         financial_agent = FinancialContextAgent(self.config)
         social_agent = SocialContextAgent(self.config)
         reasoning_agent = ReasoningAgent(self.config)
 
+        # Add agents to the workflow
+        self.hierarchical_workflow.add_sub_workflow(
+            "image_processor", image_processor_agent
+        )
         self.hierarchical_workflow.add_sub_workflow("core_agent", core_agent)
         self.hierarchical_workflow.add_sub_workflow(
             "historical_agent", historical_agent
@@ -1261,162 +107,161 @@ def __init__(self, config: Optional[Dict[str, Any]] = None):
         self.hierarchical_workflow.add_sub_workflow("social_agent", social_agent)
         self.hierarchical_workflow.add_sub_workflow("reasoning_agent", reasoning_agent)
 
+        # Set entry point and other workflow properties
         self.hierarchical_workflow.set_entry_point("image_processor")
+        self.hierarchical_workflow.set_supervisor_logic(self._supervisor_logic)
+        self.hierarchical_workflow.set_halt_condition(self._halt_condition)
+        self.required_fields = ["proposal_id", "proposal_data"]
+
+    def _supervisor_logic(
+        self, state: ProposalEvaluationState
+    ) -> Union[str, List[str]]:
+        """Determine which agent(s) to run next based on current state.
 
-        def supervisor_logic(state: ProposalEvaluationState) -> Union[str, List[str]]:
-            """Determine the next step in the workflow."""
-            proposal_id = state.get("proposal_id", "unknown")
+        Args:
+            state: Current workflow state
 
-            # Debugging current state view for supervisor
+        Returns:
+            String or list of strings identifying next agent(s) to run
+        """
+        # Initialize core agent invocations counter if not present
+        if "core_agent_invocations" not in state:
+            state["core_agent_invocations"] = 0
+
+        # Debug counter behavior
+        logger.debug(
+            f"[DEBUG:CoreCounter] Current invocations count: {state.get('core_agent_invocations', 0)}"
+        )
+
+        # Check if state has images processed
+        # If proposal_images key doesn't exist, we need to process images
+        # If it exists (even if it's an empty list), we consider images processed
+        if "proposal_images" not in state:
+            logger.debug("[DEBUG:SupervisorLogic] Need to process images first")
+            # Process images and ensure the key exists in state, even if empty
+            result = state.get("image_processor", [])
+            if isinstance(result, list):
+                # Update state with empty list if no images were found
+                state["proposal_images"] = result
+            else:
+                # Ensure we always have the key to prevent infinite loops
+                state["proposal_images"] = []
+            return "image_processor"
+
+        # Check if core context evaluation is done
+        if "core_score" not in state:
+            logger.debug("[DEBUG:SupervisorLogic] Need core context evaluation")
+            old_count = state.get("core_agent_invocations", 0)
+            state["core_agent_invocations"] = old_count + 1
             logger.debug(
-                f"[DEBUG:Supervisor:{proposal_id}] Evaluating next step. State keys: {list(state.keys())}. "
-                f"proposal_images set: {'proposal_images' in state}, "
-                f"core_score set: {state.get('core_score') is not None}, "
-                f"historical_score set: {state.get('historical_score') is not None}, "
-                f"financial_score set: {state.get('financial_score') is not None}, "
-                f"social_score set: {state.get('social_score') is not None}, "
-                f"final_score set: {state.get('final_score') is not None}"
+                f"[DEBUG:CoreCounter] Incremented invocations: {old_count} -> {state['core_agent_invocations']}"
             )
+            return "core_agent"
 
-            if state.get("halt", False):
-                logger.debug(
-                    f"[DEBUG:Supervisor:{proposal_id}] Halt condition met, returning END"
-                )
-                return END
-
-            # After image_processor (entry point), if core_score isn't set, go to core_agent.
-            # The image_processor node output (even if empty list for images) should be in state.
-            if state.get("core_score") is None:
-                # This will be the first check after image_processor completes as it's the entry point.
-                current_core_invocations = state.get("core_agent_invocations", 0)
-                if current_core_invocations > 3:
-                    logger.error(
-                        f"[DEBUG:Supervisor:{proposal_id}] Core agent invoked too many times ({current_core_invocations}), halting."
-                    )
-                    return END
-
-                # Do not manually increment core_agent_invocations - the langgraph framework will handle this
-                # with the Annotated type we restored
-
-                logger.debug(
-                    f"[DEBUG:Supervisor:{proposal_id}] Routing to core_agent (core_score is None, invocation #{current_core_invocations})."
-                )
-                return "core_agent"
+        # Run specialized agents in parallel if they haven't run yet
+        agents_to_run = []
 
-            if state.get("historical_score") is None:
-                logger.debug(
-                    f"[DEBUG:Supervisor:{proposal_id}] Routing to historical_agent."
-                )
-                return "historical_agent"
-
-            if (
-                state.get("financial_score") is None
-                or state.get("social_score") is None
-            ):
-                parallel_nodes = []
-                if state.get("financial_score") is None:
-                    parallel_nodes.append("financial_agent")
-                if state.get("social_score") is None:
-                    parallel_nodes.append("social_agent")
-                logger.debug(
-                    f"[DEBUG:Supervisor:{proposal_id}] Initiating parallel execution of {parallel_nodes}"
-                )
-                return parallel_nodes
+        if "historical_score" not in state:
+            agents_to_run.append("historical_agent")
 
-            if state.get("final_score") is None:
-                logger.debug(
-                    f"[DEBUG:Supervisor:{proposal_id}] All scores available but final score is None, routing to reasoning_agent"
-                )
-                return "reasoning_agent"
+        if "financial_score" not in state:
+            agents_to_run.append("financial_agent")
+
+        if "social_score" not in state:
+            agents_to_run.append("social_agent")
+
+        if agents_to_run:
+            logger.debug(
+                f"[DEBUG:SupervisorLogic] Running specialized agents: {agents_to_run}"
+            )
+            return agents_to_run
 
+        # If all specialized agents have run, run the reasoning agent for final decision
+        if "final_score" not in state:
             logger.debug(
-                f"[DEBUG:Supervisor:{proposal_id}] All scores completed, returning END"
+                "[DEBUG:SupervisorLogic] All specialized agents done, running reasoning agent"
+            )
+            logger.info(
+                f"[DEBUG:DIAGNOSIS] About to run reasoning_agent, state keys: {list(state.keys())}"
             )
-            return END
+            return "reasoning_agent"
 
-        self.hierarchical_workflow.set_supervisor_logic(supervisor_logic)
+        # If reasoning agent has run, we're done
+        logger.debug("[DEBUG:SupervisorLogic] Workflow complete")
 
-        def halt_condition(state: ProposalEvaluationState) -> bool:
-            """Check if workflow should halt."""
-            proposal_id = state.get("proposal_id", "unknown")
+        # Add diagnosis logging
+        logger.info(
+            f"[DEBUG:DIAGNOSIS] Workflow complete, final_score type: {type(state.get('final_score'))}"
+        )
+        logger.info(
+            f"[DEBUG:DIAGNOSIS] Final score contents: {state.get('final_score')}"
+        )
 
-            if state.get("halt", False):
-                logger.debug(
-                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow due to explicit halt flag"
-                )
-                return True
+        # Log the entire state and final reasoning as JSON
+        import json
 
-            # Check for excessive core agent invocations
-            if state.get("core_agent_invocations", 0) > 3:
-                logger.debug(
-                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow due to excessive core agent invocations: {state.get('core_agent_invocations', 0)}"
-                )
-                return True
+        logger.info(f"[DEBUG:FinalState] {json.dumps(state, default=str, indent=2)}")
 
-            recursion_count = state.get("recursion_count", 0)
-            if recursion_count > 8:
-                logger.debug(
-                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow - possible loop detected after {recursion_count} iterations"
-                )
-                return True
-
-            if (
-                state.get("core_score") is not None
-                and state.get("historical_score") is not None
-                and state.get("financial_score") is not None
-                and state.get("social_score") is not None
-                and state.get("final_score") is None
-                and recursion_count > 3
-            ):
-                logger.debug(
-                    f"[DEBUG:HaltCondition:{proposal_id}] Halting workflow - reasoning agent appears to be failing after {recursion_count} attempts"
-                )
-                return True
+        return END
 
-            state["recursion_count"] = recursion_count + 1
-            logger.debug(
-                f"[DEBUG:HaltCondition:{proposal_id}] Incrementing recursion counter to {state['recursion_count']}"
-            )
+    def _halt_condition(self, state: ProposalEvaluationState) -> bool:
+        """Determine if the workflow should halt early.
 
-            return False
+        Args:
+            state: Current workflow state
 
-        self.hierarchical_workflow.set_halt_condition(halt_condition)
-        self.required_fields = ["proposal_id", "proposal_data"]
+        Returns:
+            True if workflow should halt, False otherwise
+        """
+        # Halt if explicitly set
+        if state.get("halt", False):
+            logger.info("[DEBUG:HaltCondition] Halting due to explicit halt flag")
+            return True
+
+        # Halt if we've run the core agent too many times (prevent loops)
+        core_agent_invocations = state.get("core_agent_invocations", 0)
+        max_core_invocations = 50
+        if core_agent_invocations > max_core_invocations:
+            logger.warning(
+                f"[DEBUG:HaltCondition] Halting due to too many core agent invocations: {core_agent_invocations}"
+            )
+            state["flags"] = state.get("flags", []) + [
+                f"Workflow halted: Too many core agent invocations ({core_agent_invocations})"
+            ]
+            return True
+
+        # Don't halt by default
+        return False
 
     def _create_prompt(self) -> PromptTemplate:
-        """Create the main workflow prompt."""
-        return PromptTemplate(
-            input_variables=["proposal_data"],
-            template="Evaluate the DAO proposal: {proposal_data}",
+        """Create the base prompt for the workflow."""
+        raise NotImplementedError(
+            "This method is not used in the hierarchical workflow"
         )
 
     def _create_graph(self) -> StateGraph:
-        """Create the workflow graph."""
+        """Create the workflow graph.
+
+        Returns:
+            The constructed state graph
+        """
         return self.hierarchical_workflow.build_graph()
 
     def _validate_state(self, state: ProposalEvaluationState) -> bool:
-        """Validate the workflow state."""
-        if not super()._validate_state(state):
-            return False
-
-        if "flags" not in state:
-            state["flags"] = []
-        elif state["flags"] is None:
-            state["flags"] = []
-
-        if "summaries" not in state:
-            state["summaries"] = {}
-        elif state["summaries"] is None:
-            state["summaries"] = {}
+        """Validate that the state contains required fields.
 
-        if "halt" not in state:
-            state["halt"] = False
-
-        if "token_usage" not in state:
-            state["token_usage"] = {}
-        elif state["token_usage"] is None:
-            state["token_usage"] = {}
+        Args:
+            state: Current workflow state
 
+        Returns:
+            True if state is valid, False otherwise
+        """
+        for field in self.required_fields:
+            if field not in state:
+                self.logger.error(
+                    f"[ProposalEvaluation] Missing required field: {field}"
+                )
+                return False
         return True
 
 
@@ -1425,337 +270,149 @@ async def evaluate_proposal(
     proposal_data: str,
     config: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
-    """Evaluate a proposal using the hierarchical team workflow."""
-    logger.info(f"[DEBUG:Workflow:{proposal_id}] Starting evaluation workflow")
-
-    debug_level = 0
-    if config and "debug_level" in config:
-        debug_level = config.get("debug_level", 0)
-        if debug_level > 0:
-            logger.debug(f"[PROPOSAL_DEBUG] Using debug_level: {debug_level}")
-
-    if not proposal_data:
-        logger.warning(
-            f"[PROPOSAL_DEBUG] proposal_data is empty or None! This will cause evaluation failure."
-        )
+    """Evaluate a proposal using the ProposalEvaluationWorkflow.
+
+    Args:
+        proposal_id: Unique identifier for the proposal
+        proposal_data: Proposal content
+        config: Optional configuration for the workflow
+
+    Returns:
+        Dictionary containing evaluation results
+    """
+    # Set up configuration with defaults if not provided
+    if config is None:
+        config = {}
 
-    state = {
+    # Use model name from config or default
+    model_name = config.get("model_name", "gpt-4.1")
+
+    workflow = ProposalEvaluationWorkflow(config)
+
+    # Create initial state
+    initial_state = {
         "proposal_id": proposal_id,
         "proposal_data": proposal_data,
         "flags": [],
         "summaries": {},
-        "halt": False,
         "token_usage": {},
-        "core_score": None,
-        "historical_score": None,
-        "financial_score": None,
-        "social_score": None,
-        "final_score": None,
-        "decision": None,
         "core_agent_invocations": 0,
-        "recursion_count": 0,
+        "halt": False,
     }
 
+    # Run workflow
     try:
-        workflow = ProposalEvaluationWorkflow(config or {})
-        logger.info(
-            f"[DEBUG:Workflow:{proposal_id}] Executing hierarchical team workflow"
-        )
-        result = await workflow.execute(state)
+        logger.info(f"Starting proposal evaluation for proposal {proposal_id}")
+        result = await workflow.execute(initial_state)
+
+        # Add diagnostic logging
         logger.info(
-            f"[DEBUG:Workflow:{proposal_id}] Workflow execution completed with decision: {result.get('decision', 'Unknown')}"
+            f"[DEBUG:EXTRACT] Workflow execution complete, result keys: {list(result.keys())}"
         )
-
-        # Only output detailed debug info at higher debug levels
-        if debug_level >= 2:
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] RESULT STRUCTURE: {list(result.keys())}"
-            )
-            logger.debug(f"[DEBUG:Workflow:{proposal_id}] RESULT SCORES TYPES:")
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Core: {type(result.get('core_score'))} = {repr(result.get('core_score'))[:100]+'...' if len(repr(result.get('core_score'))) > 100 else repr(result.get('core_score'))}"
-            )
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Historical: {type(result.get('historical_score'))} = {repr(result.get('historical_score'))[:100]+'...' if len(repr(result.get('historical_score'))) > 100 else repr(result.get('historical_score'))}"
-            )
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Financial: {type(result.get('financial_score'))} = {repr(result.get('financial_score'))[:100]+'...' if len(repr(result.get('financial_score'))) > 100 else repr(result.get('financial_score'))}"
-            )
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Social: {type(result.get('social_score'))} = {repr(result.get('social_score'))[:100]+'...' if len(repr(result.get('social_score'))) > 100 else repr(result.get('social_score'))}"
-            )
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Final: {type(result.get('final_score'))} = {repr(result.get('final_score'))[:100]+'...' if len(repr(result.get('final_score'))) > 100 else repr(result.get('final_score'))}"
+        logger.info(f"[DEBUG:EXTRACT] final_score in result: {'final_score' in result}")
+        if "final_score" in result:
+            logger.info(
+                f"[DEBUG:EXTRACT] final_score type: {type(result['final_score'])}"
             )
-            logger.debug(
-                f"[DEBUG:Workflow:{proposal_id}] - Decision: {type(result.get('decision'))} = {repr(result.get('decision'))}"
-            )
-
-        if result is None:
-            logger.error(
-                f"[DEBUG:Workflow:{proposal_id}] Workflow returned None result, using default values"
-            )
-            return {
-                "proposal_id": proposal_id,
-                "score": 0,
-                "decision": "Error",
-                "explanation": "Evaluation failed: Workflow returned empty result",
-                "component_scores": {
-                    "core": 0,
-                    "historical": 0,
-                    "financial": 0,
-                    "social": 0,
-                },
-                "flags": ["Workflow error: Empty result"],
-                "token_usage": {},
-            }
+            logger.info(f"[DEBUG:EXTRACT] final_score content: {result['final_score']}")
 
+        # Extract results
         def safe_extract_score(value, default=0):
+            """Safely extract a score from a potentially complex structure."""
             if isinstance(value, dict) and "score" in value:
-                return value.get("score", default)
-            elif isinstance(value, int):
-                return value
-            elif isinstance(value, str):
-                try:
-                    return int(value)
-                except ValueError:
-                    pass  # If string is not int, will fall through to default
+                return value["score"]
             return default
 
-        final_score_val = result.get("final_score")
-        final_score_dict = {}
-        if isinstance(final_score_val, dict):
-            final_score_dict = final_score_val
-
-        component_scores = {
-            "core": safe_extract_score(result.get("core_score")),
-            "historical": safe_extract_score(result.get("historical_score")),
-            "financial": safe_extract_score(result.get("financial_score")),
-            "social": safe_extract_score(result.get("social_score")),
+        # Get all scores for reporting
+        core_score = safe_extract_score(result.get("core_score"))
+        historical_score = safe_extract_score(result.get("historical_score"))
+        financial_score = safe_extract_score(result.get("financial_score"))
+        social_score = safe_extract_score(result.get("social_score"))
+        final_score = safe_extract_score(result.get("final_score"))
+
+        # Get decision
+        final_decision = "Undecided"
+        final_explanation = "No final decision was reached."
+
+        if isinstance(result.get("final_score"), dict):
+            final_decision = result["final_score"].get("decision", "Undecided")
+            final_explanation = result["final_score"].get(
+                "explanation", "No explanation provided."
+            )
+
+        # Determine approval and confidence
+        approval = final_decision.lower() == "approve"
+        confidence = 0.7  # Default confidence
+
+        if (
+            isinstance(result.get("final_score"), dict)
+            and "confidence" in result["final_score"]
+        ):
+            confidence = result["final_score"]["confidence"]
+
+        # Compile token usage
+        token_usage = result.get("token_usage", {})
+        total_token_usage = {
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "total_tokens": 0,
         }
 
-        # This is a useful log to keep even at lower debug levels
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] EXTRACTED COMPONENT SCORES: {component_scores}"
-        )
-
-        explanation = ""
-        if isinstance(final_score_dict, dict) and "explanation" in final_score_dict:
-            explanation = final_score_dict.get("explanation", "")
-        elif isinstance(final_score_val, str):
-            explanation = final_score_val
-
-        # Log the explanation to help debug
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] Explanation extracted: {explanation[:100]}..."
-        )
-
-        final_score = 0
-        if isinstance(final_score_dict, dict) and "score" in final_score_dict:
-            final_score = final_score_dict.get("score", 0)
-        else:
-            final_score = safe_extract_score(final_score_val)
-
-        decision = result.get("decision")
-        if decision is None:
-            if isinstance(final_score_dict, dict) and "decision" in final_score_dict:
-                decision = final_score_dict.get("decision")
-            else:
-                decision = "Reject"
-
-        logger.debug(
-            f"[DEBUG:Workflow:{proposal_id}] Final decision: {decision}, score: {final_score}"
-        )
-
-        total_token_usage = result.get("token_usage", {})
-        total_input_tokens = 0
-        total_output_tokens = 0
-        total_tokens = 0
-
-        # Aggregate tokens from all agent steps
-        # Assuming model_name is consistent across all steps for this aggregation, or we use the primary model_name
-        # If each agent could use a different model, this would need more detailed per-model tracking
-        logger.debug(f"Token usage entries in result: {list(total_token_usage.keys())}")
-        for agent_key, usage_data in total_token_usage.items():
-            if isinstance(usage_data, dict):
-                total_input_tokens += usage_data.get("input_tokens", 0)
-                total_output_tokens += usage_data.get("output_tokens", 0)
-                total_tokens += usage_data.get("total_tokens", 0)
-                logger.debug(f"Token usage for {agent_key}: {usage_data}")
-            else:
-                logger.warning(
-                    f"Unexpected format for token_usage data for agent {agent_key}: {usage_data}"
-                )
+        for agent_usage in token_usage.values():
+            total_token_usage["input_tokens"] += agent_usage.get("input_tokens", 0)
+            total_token_usage["output_tokens"] += agent_usage.get("output_tokens", 0)
+            total_token_usage["total_tokens"] += agent_usage.get("total_tokens", 0)
 
-        # Extract component summaries for detailed reporting
-        component_summaries = {}
-        if isinstance(result.get("summaries"), dict):
-            component_summaries = result.get("summaries")
-
-        # Extract and aggregate flags
-        all_flags = result.get("flags", [])
-        if not isinstance(all_flags, list):
-            all_flags = []
-
-        # Placeholder for web search specific token usage if it were tracked separately
-        # In the original, these seemed to be fixed placeholders.
-        web_search_input_tokens = 0
-        web_search_output_tokens = 0
-        web_search_total_tokens = 0
-
-        # Initialize total token usage by model
-        total_token_usage_by_model = {}
-
-        # Extract token usage by model from token_usage data
-        for agent_name, agent_usage in total_token_usage.items():
-            if isinstance(agent_usage, dict) and agent_usage.get("total_tokens", 0) > 0:
-                # Get model name from config, or use default
-                model_name = config.get(
-                    "model_name", "gpt-4.1"
-                )  # Use configured model name
-
-                # Extract model name from each agent usage if available
-                # This would require each agent to include model info in their token usage
-                if "model_name" in agent_usage:
-                    model_name = agent_usage["model_name"]
-
-                # Initialize the model entry if needed
-                if model_name not in total_token_usage_by_model:
-                    total_token_usage_by_model[model_name] = {
-                        "input_tokens": 0,
-                        "output_tokens": 0,
-                        "total_tokens": 0,
-                    }
-
-                # Add token usage for this agent to the model's tally
-                total_token_usage_by_model[model_name][
-                    "input_tokens"
-                ] += agent_usage.get("input_tokens", 0)
-                total_token_usage_by_model[model_name][
-                    "output_tokens"
-                ] += agent_usage.get("output_tokens", 0)
-                total_token_usage_by_model[model_name][
-                    "total_tokens"
-                ] += agent_usage.get("total_tokens", 0)
-
-        # Fallback if no token usage was recorded
-        if not total_token_usage_by_model:
-            total_token_usage_by_model["gpt-4.1"] = {
-                "input_tokens": total_input_tokens,
-                "output_tokens": total_output_tokens,
-                "total_tokens": total_tokens,
-            }
-
-        # Improved cost calculation by model
-        cost_per_thousand = {
-            "gpt-4.1": 0.01,  # $0.01 per 1K tokens
-            "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
-            "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
-            "gpt-4": 0.03,  # $0.03 per 1K tokens
-            "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
-            "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
-            "default": 0.01,  # default fallback
-        }
-
-        # Calculate costs for each model
-        total_cost_by_model = {}
-        total_overall_cost = 0.0
-        for model_name, usage in total_token_usage_by_model.items():
-            # Get cost per 1K tokens for this model
-            model_cost_per_k = cost_per_thousand.get(
-                model_name, cost_per_thousand["default"]
-            )
-            # Calculate cost for this model's usage
-            model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
-            total_cost_by_model[model_name] = model_cost
-            total_overall_cost += model_cost
-
-        if not total_cost_by_model:
-            # Fallback if no models were recorded
-            model_name = "gpt-4.1"  # Default model name
-            total_cost_by_model[model_name] = total_tokens * (
-                cost_per_thousand["default"] / 1000
-            )
-            total_overall_cost = total_cost_by_model[model_name]
-
-        final_result = {
-            "success": True,
-            "evaluation": {
-                "approve": decision == "Approve",
-                "confidence_score": final_score / 100.0 if final_score else 0.0,
-                "reasoning": explanation,
-            },
-            "decision": decision,
-            "score": final_score,
-            "explanation": explanation,
-            "component_scores": component_scores,
-            "component_summaries": component_summaries,  # Include component summaries
-            "flags": all_flags,
-            "token_usage": total_token_usage,  # Include all token usage details
-            "web_search_results": [],
-            "treasury_balance": None,
-            "web_search_token_usage": {
-                "input_tokens": web_search_input_tokens,
-                "output_tokens": web_search_output_tokens,
-                "total_tokens": web_search_total_tokens,
-            },
-            "evaluation_token_usage": {
-                "input_tokens": total_input_tokens,
-                "output_tokens": total_output_tokens,
-                "total_tokens": total_tokens,
-            },
-            "evaluation_model_info": {
-                "name": config.get("model_name", "gpt-4.1"),
-                "temperature": config.get("temperature", 0.1),
-            },
-            "web_search_model_info": {
-                "name": config.get("model_name", "gpt-4.1"),
-                "temperature": config.get("temperature", 0.1),
+        # Return formatted result
+        evaluation_result = {
+            "proposal_id": proposal_id,
+            "approve": approval,
+            "confidence_score": confidence,
+            "reasoning": final_explanation,
+            "scores": {
+                "core": core_score,
+                "historical": historical_score,
+                "financial": financial_score,
+                "social": social_score,
+                "final": final_score,
             },
-            "total_token_usage_by_model": total_token_usage_by_model,
-            "total_cost_by_model": total_cost_by_model,
-            "total_overall_cost": total_overall_cost,
+            "flags": result.get("flags", []),
+            "summaries": result.get("summaries", {}),
+            "token_usage": total_token_usage,
+            "model_name": model_name,
         }
 
-        logger.debug(
-            f"Proposal evaluation completed: Success={final_result['success']} | Decision={'APPROVE' if decision == 'Approve' else 'REJECT'} | Confidence={final_result['evaluation']['confidence_score']:.2f} | Auto-voted={decision == 'Approve'}"
+        logger.info(
+            f"Completed proposal evaluation for proposal {proposal_id}: {final_decision}"
         )
-        return final_result
+        return evaluation_result
+
     except Exception as e:
-        logger.error(f"Error in workflow execution: {str(e)}", exc_info=True)
+        logger.error(f"Error in proposal evaluation: {str(e)}")
         return {
             "proposal_id": proposal_id,
-            "score": 0,
-            "decision": "Error",
-            "explanation": f"Evaluation failed: {str(e)}",
-            "component_scores": {
-                "core": 0,
-                "historical": 0,
-                "financial": 0,
-                "social": 0,
-            },
-            "flags": [f"Workflow error: {str(e)}"],
-            "token_usage": {},
+            "approve": False,
+            "confidence_score": 0.1,
+            "reasoning": f"Evaluation failed due to error: {str(e)}",
+            "error": str(e),
         }
 
 
 def get_proposal_evaluation_tools(
     profile: Optional[Profile] = None, agent_id: Optional[UUID] = None
 ):
-    """Get the tools needed for proposal evaluation."""
-    all_tools = initialize_tools(profile=profile, agent_id=agent_id)
-    logger.debug(f"Available tools: {', '.join(all_tools.keys())}")
-    required_tools = [
-        "dao_action_get_proposal",
-        "dao_action_vote_on_proposal",
-        "dao_action_get_voting_power",
-        "dao_action_get_voting_configuration",
-        "database_get_dao_get_by_name",
-        "dao_search",
-    ]
-    filtered_tools = filter_tools_by_names(required_tools, all_tools)
-    logger.debug(f"Using tools: {', '.join(filtered_tools.keys())}")
-    return filtered_tools
+    """Get tools for proposal evaluation.
+
+    Args:
+        profile: Optional user profile
+        agent_id: Optional agent ID
+
+    Returns:
+        List of available tools
+    """
+    tool_names = ["vote_on_action_proposal"]
+    tools = initialize_tools(profile, agent_id)
+    return filter_tools_by_names(tools, tool_names)
 
 
 async def evaluate_and_vote_on_proposal(
@@ -1767,283 +424,133 @@ async def evaluate_and_vote_on_proposal(
     dao_id: Optional[UUID] = None,
     debug_level: int = 0,  # 0=normal, 1=verbose, 2=very verbose
 ) -> Dict:
-    """Evaluate a proposal and automatically vote based on the evaluation."""
-    logger.debug(
-        f"Starting proposal evaluation: proposal_id={proposal_id} | auto_vote={auto_vote} | confidence_threshold={confidence_threshold} | debug_level={debug_level}"
-    )
-    try:
-        effective_agent_id = agent_id
-        if not effective_agent_id and wallet_id:
-            wallet = backend.get_wallet(wallet_id)
-            if wallet and wallet.agent_id:
-                effective_agent_id = wallet.agent_id
-                logger.debug(
-                    f"Using agent ID {effective_agent_id} from wallet {wallet_id}"
-                )
-
-        model_name = "gpt-4.1"
-        temperature = 0.1
-        if effective_agent_id:
-            try:
-                prompts = backend.list_prompts(
-                    PromptFilter(
-                        agent_id=effective_agent_id,
-                        dao_id=dao_id,
-                        is_active=True,
-                        limit=1,
-                    )
-                )
-                if prompts:
-                    first_prompt = prompts[0]
-                    model_name = first_prompt.model or model_name
-                    temperature = (
-                        first_prompt.temperature
-                        if first_prompt.temperature is not None
-                        else temperature
-                    )
-                    logger.debug(
-                        f"Using model settings from agent {effective_agent_id}: {model_name} (temp={temperature})"
-                    )
-                else:
-                    logger.warning(
-                        f"No active prompts found for agent {effective_agent_id}."
-                    )
-            except Exception as e:
-                logger.error(
-                    f"Failed to get agent prompt settings: {str(e)}", exc_info=True
-                )
+    """Evaluate a proposal and optionally vote on it.
+
+    Args:
+        proposal_id: Proposal ID
+        wallet_id: Optional wallet ID
+        agent_id: Optional agent ID
+        auto_vote: Whether to automatically vote based on evaluation
+        confidence_threshold: Confidence threshold for auto-voting
+        dao_id: Optional DAO ID
+        debug_level: Debug level (0=normal, 1=verbose, 2=very verbose)
+
+    Returns:
+        Evaluation and voting results
+    """
+    # Get proposal details
+    logger.info(f"Retrieving proposal details for {proposal_id}")
 
-        logger.debug(
-            f"[PROPOSAL_DEBUG] Fetching proposal data from backend for ID: {proposal_id}"
-        )
-        proposal_data = backend.get_proposal(proposal_id)
-        if not proposal_data:
-            logger.error(
-                f"[PROPOSAL_DEBUG] No proposal data found for ID: {proposal_id}"
-            )
-            raise ValueError(f"Proposal {proposal_id} not found")
-
-        logger.debug(f"[PROPOSAL_DEBUG] Raw proposal data: {proposal_data}")
+    try:
+        proposal = backend.get_proposal(proposal_id=proposal_id)
 
-        proposal_content = proposal_data.parameters or ""
-        if not proposal_content:
-            logger.warning(f"[PROPOSAL_DEBUG] Proposal parameters/content is empty!")
+        if not proposal:
+            logger.error(f"Proposal {proposal_id} not found")
+            return {"error": f"Proposal {proposal_id} not found"}
 
+        # Set up config based on debug level
         config = {
-            "model_name": model_name,
-            "temperature": temperature,
-            "mission_collection": "knowledge_collection",
-            "proposals_collection": "proposals",
-            "enable_web_search": True,
-            "planning_model": "gpt-4.1-mini",
+            "debug_level": debug_level,
         }
 
-        if debug_level > 0:
-            config["debug_level"] = debug_level
-            logger.debug(f"[PROPOSAL_DEBUG] Setting debug_level to {debug_level}")
-
-        if not dao_id and proposal_data.dao_id:
-            dao_id = proposal_data.dao_id
-        dao_info = None
-        if dao_id:
-            dao_info = backend.get_dao(dao_id)
-            if dao_info:
-                config["dao_mission"] = dao_info.mission
-
-        treasury_balance = None
-        try:
-            if dao_id:
-                treasury_extensions = backend.list_extensions(
-                    ExtensionFilter(dao_id=dao_id, type="EXTENSIONS_TREASURY")
-                )
-                if treasury_extensions:
-                    hiro_api = HiroApi()
-                    treasury_balance = hiro_api.get_address_balance(
-                        treasury_extensions[0].contract_principal
-                    )
-        except Exception as e:
-            logger.error(f"Failed to get treasury balance: {str(e)}", exc_info=True)
-
-        logger.debug("Starting hierarchical evaluation workflow...")
-        eval_result = await evaluate_proposal(
+        if debug_level >= 1:
+            # For verbose debugging, customize agent settings
+            config["approval_threshold"] = 70
+            config["veto_threshold"] = 30
+            config["consensus_threshold"] = 10
+
+        # Evaluate the proposal
+        logger.info(f"Starting evaluation of proposal {proposal_id}")
+        evaluation_result = await evaluate_proposal(
             proposal_id=str(proposal_id),
-            proposal_data=proposal_data.parameters,
+            proposal_data=proposal.parameters,
             config=config,
         )
 
-        decision = eval_result.get("decision")
-        if decision is None:
-            decision = "Reject"
-            logger.warning(
-                f"No decision found in evaluation results, defaulting to '{decision}'"
-            )
-
-        score = eval_result.get("score", 0)
-        confidence_score = score / 100.0 if score else 0.0
-
-        approve = False
-        if isinstance(decision, str) and decision.lower() == "approve":
-            approve = True
-
-        should_vote = auto_vote and confidence_score >= confidence_threshold
-
-        vote_result = None
-        tx_id = None
-        if should_vote and wallet_id:
-            try:
-                vote_tool = VoteOnActionProposalTool(wallet_id=wallet_id)
-                if proposal_data.type == ProposalType.ACTION:
-                    contract_info = proposal_data.contract_principal
-                    if "." in contract_info:
-                        parts = contract_info.split(".")
-                        if len(parts) >= 2:
-                            action_proposals_contract = parts[0]
-                            action_proposals_voting_extension = parts[1]
-                            result = await vote_tool.vote_on_proposal(
-                                contract_principal=action_proposals_contract,
-                                extension_name=action_proposals_voting_extension,
-                                proposal_id=proposal_data.proposal_id,
-                                vote=approve,
-                            )
-                            vote_result = {
-                                "success": result is not None,
-                                "output": result,
-                            }
-                            if (
-                                result
-                                and isinstance(result, str)
-                                and "txid:" in result.lower()
-                            ):
-                                for line in result.split("\n"):
-                                    if "txid:" in line.lower():
-                                        parts = line.split(":")
-                                        if len(parts) > 1:
-                                            tx_id = parts[1].strip()
-                                            break
-                    else:
-                        logger.warning(
-                            f"Invalid contract principal format: {contract_info}"
-                        )
-                else:
-                    logger.warning(
-                        f"Cannot vote on non-action proposal type: {proposal_data.type}"
-                    )
-            except Exception as e:
-                logger.error(f"Error executing vote: {str(e)}", exc_info=True)
-                vote_result = {
-                    "success": False,
-                    "error": f"Error during voting: {str(e)}",
+        # Check if auto voting is enabled
+        if auto_vote:
+            if "error" in evaluation_result:
+                logger.error(
+                    f"Skipping voting due to evaluation error: {evaluation_result['error']}"
+                )
+                return {
+                    "evaluation": evaluation_result,
+                    "vote_result": None,
+                    "message": "Skipped voting due to evaluation error",
                 }
-        elif not should_vote:
-            vote_result = {
-                "success": True,
-                "message": "Voting skipped due to confidence threshold or auto_vote setting",
-                "data": None,
-            }
 
-        # Get token usage data from eval_result
-        total_token_usage = eval_result.get("token_usage", {})
-        total_input_tokens = 0
-        total_output_tokens = 0
-        total_tokens = 0
-
-        # Aggregate tokens from all agent steps - no need to log duplicates here
-        for agent_key, usage_data in total_token_usage.items():
-            if isinstance(usage_data, dict):
-                total_input_tokens += usage_data.get("input_tokens", 0)
-                total_output_tokens += usage_data.get("output_tokens", 0)
-                total_tokens += usage_data.get("total_tokens", 0)
-
-        # Initialize total_token_usage_by_model using data from eval_result
-        total_token_usage_by_model = eval_result.get("total_token_usage_by_model", {})
-        if not total_token_usage_by_model:
-            # Use the default model name from settings or default to gpt-4.1
-            default_model = model_name or "gpt-4.1"
-            # Add total token counts to the model
-            total_token_usage_by_model[default_model] = {
-                "input_tokens": total_input_tokens,
-                "output_tokens": total_output_tokens,
-                "total_tokens": total_tokens,
-            }
+            # Check if the confidence score meets the threshold
+            confidence_score = evaluation_result.get("confidence_score", 0)
 
-        # Get cost calculations from eval_result if available
-        total_cost_by_model = eval_result.get("total_cost_by_model", {})
-        total_overall_cost = eval_result.get("total_overall_cost", 0.0)
-
-        # If cost data is missing, calculate it
-        if not total_cost_by_model:
-            # Improved cost calculation by model
-            cost_per_thousand = {
-                "gpt-4.1": 0.01,  # $0.01 per 1K tokens
-                "gpt-4.1-mini": 0.005,  # $0.005 per 1K tokens
-                "gpt-4.1-32k": 0.03,  # $0.03 per 1K tokens
-                "gpt-4": 0.03,  # $0.03 per 1K tokens
-                "gpt-4-32k": 0.06,  # $0.06 per 1K tokens
-                "gpt-3.5-turbo": 0.0015,  # $0.0015 per 1K tokens
-                "default": 0.01,  # default fallback
-            }
+            if confidence_score >= confidence_threshold:
+                # Get the vote decision
+                approve = evaluation_result.get("approve", False)
+                vote_direction = "for" if approve else "against"
 
-            # Calculate costs for each model
-            total_cost_by_model = {}
-            total_overall_cost = 0.0
-            for model_key, usage in total_token_usage_by_model.items():
-                # Get cost per 1K tokens for this model
-                model_cost_per_k = cost_per_thousand.get(
-                    model_key, cost_per_thousand["default"]
+                logger.info(
+                    f"Auto-voting {vote_direction} proposal {proposal_id} with confidence {confidence_score}"
                 )
-                # Calculate cost for this model's usage
-                model_cost = usage["total_tokens"] * (model_cost_per_k / 1000)
-                total_cost_by_model[model_key] = model_cost
-                total_overall_cost += model_cost
-
-        # Construct final result with voting information added
-        final_result = {
-            "success": True,
-            "evaluation": {
-                "approve": approve,
-                "confidence_score": confidence_score,
-                "reasoning": eval_result.get("explanation", ""),
-            },
-            "vote_result": vote_result,
-            "auto_voted": should_vote,
-            "tx_id": tx_id,
-            "vector_results": [],
-            "recent_tweets": [],
-            "web_search_results": eval_result.get("web_search_results", []),
-            "treasury_balance": treasury_balance,
-            "component_scores": eval_result.get("component_scores", {}),
-            "component_summaries": eval_result.get("component_summaries", {}),
-            "flags": eval_result.get("flags", []),
-            "token_usage": total_token_usage,
-            "web_search_token_usage": eval_result.get(
-                "web_search_token_usage",
-                {
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                    "total_tokens": 0,
-                },
-            ),
-            "evaluation_token_usage": {
-                "input_tokens": total_input_tokens,
-                "output_tokens": total_output_tokens,
-                "total_tokens": total_tokens,
-            },
-            "evaluation_model_info": {"name": model_name, "temperature": temperature},
-            "web_search_model_info": {"name": model_name, "temperature": temperature},
-            "total_token_usage_by_model": total_token_usage_by_model,
-            "total_cost_by_model": total_cost_by_model,
-            "total_overall_cost": total_overall_cost,
-        }
 
-        # Single log entry about the final result instead of duplicating token usage logs
-        logger.debug(
-            f"Proposal evaluation completed with voting: Decision={'APPROVE' if approve else 'REJECT'} | Confidence={confidence_score:.2f} | Auto-voted={should_vote} | Transaction={tx_id or 'None'}"
-        )
-        return final_result
+                # Get the voting tool
+                profile = await backend.get_profile(
+                    wallet_id=wallet_id, agent_id=agent_id
+                )
+                tools = get_proposal_evaluation_tools(profile, agent_id)
+                vote_tool = next(
+                    (t for t in tools if isinstance(t, VoteOnActionProposalTool)), None
+                )
+
+                if vote_tool:
+                    try:
+                        # Execute the vote
+                        vote_result = await vote_tool.execute(
+                            proposal_id=str(proposal_id),
+                            vote=vote_direction,
+                            wallet_id=str(wallet_id) if wallet_id else None,
+                            dao_id=str(dao_id) if dao_id else None,
+                        )
+
+                        logger.info(f"Vote result: {vote_result}")
+
+                        return {
+                            "evaluation": evaluation_result,
+                            "vote_result": vote_result,
+                            "message": f"Voted {vote_direction} with confidence {confidence_score:.2f}",
+                        }
+                    except Exception as e:
+                        logger.error(f"Error voting on proposal: {str(e)}")
+                        return {
+                            "evaluation": evaluation_result,
+                            "vote_result": None,
+                            "error": f"Error voting on proposal: {str(e)}",
+                        }
+                else:
+                    logger.error("Vote tool not available")
+                    return {
+                        "evaluation": evaluation_result,
+                        "vote_result": None,
+                        "error": "Vote tool not available",
+                    }
+            else:
+                logger.info(
+                    f"Skipping auto-vote due to low confidence: {confidence_score} < {confidence_threshold}"
+                )
+                return {
+                    "evaluation": evaluation_result,
+                    "vote_result": None,
+                    "message": f"Skipped voting due to low confidence: {confidence_score:.2f} < {confidence_threshold}",
+                }
+        else:
+            logger.info(f"Auto-voting disabled, returning evaluation only")
+            return {
+                "evaluation": evaluation_result,
+                "vote_result": None,
+                "message": "Auto-voting disabled",
+            }
+
     except Exception as e:
-        error_msg = f"Unexpected error in evaluate_and_vote_on_proposal: {str(e)}"
-        logger.error(error_msg, exc_info=True)
-        return {"success": False, "error": error_msg}
+        logger.error(f"Error in evaluate_and_vote_on_proposal: {str(e)}")
+        return {"error": f"Failed to evaluate proposal: {str(e)}"}
 
 
 async def evaluate_proposal_only(
@@ -2052,30 +559,22 @@ async def evaluate_proposal_only(
     agent_id: Optional[UUID] = None,
     dao_id: Optional[UUID] = None,
 ) -> Dict:
-    """Evaluate a proposal without voting."""
-    logger.debug(f"Starting proposal-only evaluation: proposal_id={proposal_id}")
-    effective_agent_id = agent_id
-    if not effective_agent_id and wallet_id:
-        wallet = backend.get_wallet(wallet_id)
-        if wallet and wallet.agent_id:
-            effective_agent_id = wallet.agent_id
-
-    result = await evaluate_and_vote_on_proposal(
+    """Evaluate a proposal without voting.
+
+    Args:
+        proposal_id: Proposal ID
+        wallet_id: Optional wallet ID
+        agent_id: Optional agent ID
+        dao_id: Optional DAO ID
+
+    Returns:
+        Evaluation results
+    """
+    # Delegate to evaluate_and_vote_on_proposal with auto_vote=False
+    return await evaluate_and_vote_on_proposal(
         proposal_id=proposal_id,
         wallet_id=wallet_id,
-        agent_id=effective_agent_id,
-        dao_id=dao_id,
+        agent_id=agent_id,
         auto_vote=False,
+        dao_id=dao_id,
     )
-
-    # Simplified logging - no need to duplicate what evaluate_and_vote_on_proposal already logged
-    logger.debug("Removing vote-related fields from response")
-    if "vote_result" in result:
-        del result["vote_result"]
-    if "auto_voted" in result:
-        del result["auto_voted"]
-    if "tx_id" in result:
-        del result["tx_id"]
-
-    logger.debug("Proposal-only evaluation completed")
-    return result
diff --git a/services/workflows/utils/__init__.py b/services/workflows/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/workflows/utils/models.py b/services/workflows/utils/models.py
new file mode 100644
index 00000000..0fc55b73
--- /dev/null
+++ b/services/workflows/utils/models.py
@@ -0,0 +1,31 @@
+from typing import List
+
+from pydantic import BaseModel, Field
+
+
+class AgentOutput(BaseModel):
+    """Output model for agent evaluations."""
+
+    score: int = Field(description="Score from 0-100")
+    flags: List[str] = Field(description="Critical issues flagged")
+    summary: str = Field(description="Summary of findings")
+
+
+class FinalOutput(BaseModel):
+    """Output model for the final evaluation decision."""
+
+    score: int = Field(description="Final evaluation score")
+    decision: str = Field(description="Approve or Reject")
+    explanation: str = Field(description="Reasoning for decision")
+
+
+class ProposalEvaluationOutput(BaseModel):
+    """Output model for proposal evaluation."""
+
+    approve: bool = Field(
+        description="Decision: true to approve (vote FOR), false to reject (vote AGAINST)"
+    )
+    confidence_score: float = Field(
+        description="Confidence score for the decision (0.0-1.0)"
+    )
+    reasoning: str = Field(description="The reasoning behind the evaluation decision")
diff --git a/services/workflows/utils/state_reducers.py b/services/workflows/utils/state_reducers.py
new file mode 100644
index 00000000..390dca13
--- /dev/null
+++ b/services/workflows/utils/state_reducers.py
@@ -0,0 +1,139 @@
+from typing import Any, Dict, List, Optional
+
+from lib.logger import configure_logger
+
+logger = configure_logger(__name__)
+
+
+def no_update_reducer(current: Any, new: List[Any]) -> Any:
+    """Reducer that prevents updates after initial value is set.
+
+    Args:
+        current: The current value
+        new: List of new values to consider
+
+    Returns:
+        The original value if set, otherwise the first non-None value from new
+    """
+    # Treat initial empty string for str types as if it were None for accepting the first value
+    is_initial_empty_string = isinstance(current, str) and current == ""
+
+    # If current is genuinely set (not None and not initial empty string), keep it.
+    if current is not None and not is_initial_empty_string:
+        return current
+
+    # Current is None or an initial empty string. Try to set it from new.
+    processed_new_values = (
+        new if isinstance(new, list) else [new]
+    )  # Ensure 'new' is a list
+    for n_val in processed_new_values:
+        if n_val is not None:
+            return n_val
+
+    # If current was None/initial empty string and new is all None or empty, return current
+    return current
+
+
+def merge_dicts(current: Optional[Dict], updates: List[Optional[Dict]]) -> Dict:
+    """Merge multiple dictionary updates into the current dictionary.
+
+    Args:
+        current: The current dictionary (or None)
+        updates: List of dictionaries to merge in
+
+    Returns:
+        The merged dictionary
+    """
+    # Initialize current if it's None
+    if current is None:
+        current = {}
+
+    # Handle case where updates is None
+    if updates is None:
+        return current
+
+    # Process updates if it's a list
+    if isinstance(updates, list):
+        for update in updates:
+            if update and isinstance(update, dict):
+                current.update(update)
+    # Handle case where updates is a single dictionary, not a list
+    elif isinstance(updates, dict):
+        current.update(updates)
+
+    return current
+
+
+def set_once(current: Any, updates: List[Any]) -> Any:
+    """Set the value once and prevent further updates.
+
+    Args:
+        current: The current value
+        updates: List of potential new values
+
+    Returns:
+        The current value if set, otherwise the first non-None value from updates
+    """
+    # If current already has a value, return it unchanged
+    if current is not None:
+        return current
+
+    # Handle case where updates is None instead of a list
+    if updates is None:
+        return None
+
+    # Process updates if it's a list
+    if isinstance(updates, list):
+        for update in updates:
+            if update is not None:
+                return update
+    # Handle case where updates is a single value, not a list
+    elif updates is not None:
+        return updates
+
+    return current
+
+
+def update_state_with_agent_result(
+    state: Dict[str, Any], agent_result: Dict[str, Any], agent_name: str
+) -> Dict[str, Any]:
+    """Update state with agent result including summaries and flags.
+
+    Args:
+        state: The current state dictionary
+        agent_result: The result dictionary from an agent
+        agent_name: The name of the agent (e.g., 'core', 'historical')
+
+    Returns:
+        The updated state dictionary
+    """
+    logger.debug(
+        f"[DEBUG:update_state:{agent_name}] Updating state with {agent_name}_score (score: {agent_result.get('score', 'N/A')})"
+    )
+
+    # Update agent score in state
+    if agent_name in ["core", "historical", "financial", "social", "final"]:
+        # Make a copy of agent_result to avoid modifying the original
+        score_dict = dict(agent_result)
+        # Don't pass token_usage through this path to avoid duplication
+        if "token_usage" in score_dict:
+            del score_dict["token_usage"]
+
+        # Directly assign the dictionary to the state key
+        state[f"{agent_name}_score"] = score_dict
+
+    # Update summaries
+    if "summaries" not in state:
+        state["summaries"] = {}
+
+    if "summary" in agent_result and agent_result["summary"]:
+        state["summaries"][f"{agent_name}_score"] = agent_result["summary"]
+
+    # Update flags
+    if "flags" not in state:
+        state["flags"] = []
+
+    if "flags" in agent_result and isinstance(agent_result["flags"], list):
+        state["flags"].extend(agent_result["flags"])
+
+    return state
diff --git a/services/workflows/utils/token_usage.py b/services/workflows/utils/token_usage.py
new file mode 100644
index 00000000..5a063213
--- /dev/null
+++ b/services/workflows/utils/token_usage.py
@@ -0,0 +1,64 @@
+from typing import Any, Dict
+
+from lib.logger import configure_logger
+from lib.utils import calculate_token_cost
+
+logger = configure_logger(__name__)
+
+
+class TokenUsageMixin:
+    """Mixin for tracking token usage in LLM calls."""
+
+    def __init__(self):
+        """Initialize token usage tracker."""
+        pass
+
+    def track_token_usage(self, prompt_text: str, result: Any) -> Dict[str, int]:
+        """Track token usage for an LLM invocation.
+
+        Args:
+            prompt_text: The prompt text sent to the LLM
+            result: The response from the LLM
+
+        Returns:
+            Dictionary containing token usage information
+        """
+        token_usage_data = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        # Try to extract token usage from LLM
+        if (
+            hasattr(self.llm, "_last_prompt_id")
+            and hasattr(self.llm, "client")
+            and hasattr(self.llm.client, "usage_by_prompt_id")
+        ):
+            last_prompt_id = self.llm._last_prompt_id
+            if last_prompt_id in self.llm.client.usage_by_prompt_id:
+                usage = self.llm.client.usage_by_prompt_id[last_prompt_id]
+                token_usage_data = {
+                    "input_tokens": usage.get("prompt_tokens", 0),
+                    "output_tokens": usage.get("completion_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                return token_usage_data
+
+        # Fallback to estimation
+        llm_model_name = getattr(self.llm, "model_name", "gpt-4.1")
+        token_count = len(prompt_text) // 4  # Simple estimation
+        token_usage_dict = {"input_tokens": token_count}
+        cost_result = calculate_token_cost(token_usage_dict, llm_model_name)
+        token_usage_data = {
+            "input_tokens": token_count,
+            "output_tokens": (
+                len(result.model_dump_json()) // 4
+                if hasattr(result, "model_dump_json")
+                else 0
+            ),
+            "total_tokens": token_count
+            + (
+                len(result.model_dump_json()) // 4
+                if hasattr(result, "model_dump_json")
+                else 0
+            ),
+            "model_name": llm_model_name,
+        }
+        return token_usage_data
diff --git a/services/workflows/web_search_mixin.py b/services/workflows/web_search_mixin.py
index f85692c4..8a8139e3 100644
--- a/services/workflows/web_search_mixin.py
+++ b/services/workflows/web_search_mixin.py
@@ -26,7 +26,7 @@ def _init_web_search(self) -> None:
         if not hasattr(self, "client"):
             self.client = OpenAI()
 
-    async def search_web(
+    async def web_search(
         self, query: str, **kwargs
     ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
         """Search the web using OpenAI Responses API.
@@ -175,7 +175,7 @@ def integrate_with_graph(self, graph: StateGraph, **kwargs) -> None:
                      - user_location: dict with type, country, city, region
         """
         # Add web search node
-        graph.add_node("web_search", self.search_web)
+        graph.add_node("web_search", self.web_search)
 
         # Add result processing node if needed
         if "process_results" not in graph.nodes:

From cd61626b1563fe1bed3bc0136db91e9a3fd34a5a Mon Sep 17 00:00:00 2001
From: human058382928 <162091348+human058382928@users.noreply.github.com>
Date: Sat, 10 May 2025 18:26:26 -0700
Subject: [PATCH 5/5] improved evaluation agents

---
 examples/proposal_evaluation_example.py       |  20 +-
 main.py                                       |   8 +-
 services/workflows/agents/core_context.py     |  63 +++---
 .../workflows/agents/financial_context.py     | 111 +++++++----
 .../workflows/agents/historical_context.py    | 186 ++++++++++++++----
 services/workflows/agents/reasoning.py        |  66 ++++---
 services/workflows/agents/social_context.py   |  85 ++++----
 services/workflows/chat.py                    |   2 +-
 8 files changed, 366 insertions(+), 175 deletions(-)

diff --git a/examples/proposal_evaluation_example.py b/examples/proposal_evaluation_example.py
index 61cae05f..05f66fb1 100644
--- a/examples/proposal_evaluation_example.py
+++ b/examples/proposal_evaluation_example.py
@@ -40,12 +40,10 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
 
 Proposal Title: $FACES Airdrop to Bitcoin Faces Holders with Transparent Execution and Community Engagement
 
-Proposal ID: [Generate a new UUID for submission]
-
 Proposer: Publius.btc
 
 Proposal Data:
-I, Publius.btc, propose to execute a $FACES airdrop to Bitcoin Faces holders to boost community engagement and reward active participants in the DAO. The airdrop will distribute 10,000 $FACES tokens to eligible holders, with a clear execution plan, transparent verification, and measurable outcomes. The proposal aligns with the DAO’s mission to promote community activity and token utility. Below are the details:
+I, Publius.btc, propose to execute a $FACES airdrop to Bitcoin Faces holders to boost community engagement and reward active participants in the DAO. Due to a limit of 1,000 tokens per proposal, this will be 1 of 10 proposals, each distributing up to 1,000 $FACES tokens. The airdrop will distribute a total of 10,000 $FACES tokens to eligible holders, with a clear execution plan, transparent verification, and measurable outcomes. The proposal aligns with the DAO's mission to promote community activity and token utility. Below are the details:
 
 Objective: Distribute $FACES tokens to Bitcoin Faces holders to incentivize participation, increase governance engagement, and strengthen community ties.
 Eligibility Criteria:
@@ -54,7 +52,7 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
 Exclusion: Wallets flagged for suspicious activity (e.g., wash trading) based on on-chain analysis.
 Execution Plan:
 Snapshot: Conduct a blockchain snapshot of Bitcoin Faces holders on the specified date, using a third-party tool (e.g., Etherscan or equivalent for Bitcoin-based assets).
-Distribution: Distribute 10 $FACES per eligible wallet, up to a total of 10,000 tokens, via a smart contract to ensure transparency and immutability.
+Distribution: Distribute 10 $FACES per eligible wallet, up to a total of 1,000 tokens per proposal, via a smart contract to ensure transparency and immutability. This proposal is part of a series of 10 proposals to reach the full 10,000 token distribution.
 Timeline:
 Day 1–7: Proposal approval and snapshot preparation.
 Day 8: Snapshot execution.
@@ -62,23 +60,23 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
 Day 15: Airdrop distribution.
 Day 20: Post-airdrop report published.
 Budget and Funding:
-Total Cost: 10,000 $FACES tokens (valued at $0.10 per token based on current market price, totaling $1,000).
+Total Cost: 1,000 $FACES tokens for this proposal (valued at $0.10 per token based on current market price, totaling $100). The full airdrop campaign will total 10,000 tokens across 10 proposals.
 Additional Costs: $500 for smart contract development, auditing, and gas fees, to be funded from the DAO treasury.
-Funding Request: 10,000 $FACES tokens + $500 in stablecoins (e.g., USDC) from the DAO treasury.
+Funding Request: 1,000 $FACES tokens + $500 in stablecoins (e.g., USDC) from the DAO treasury for this proposal.
 Cost Justification: The airdrop is cost-effective, targeting active holders to maximize engagement with minimal token dilution. The $500 covers secure execution to mitigate risks.
 Verification and Transparency:
-Publish the snapshot data and eligible wallet list on the DAO’s governance forum.
+Publish the snapshot data and eligible wallet list on the DAO's governance forum.
 Share the smart contract address and transaction hashes on-chain for public verification.
 Provide a detailed post-airdrop report within 5 days of distribution, including the number of wallets reached, tokens distributed, and community feedback.
 Community Benefit:
 Inclusivity: All Bitcoin Faces holders are eligible, ensuring broad participation.
 Engagement: The airdrop will encourage holders to participate in governance and DAO activities, addressing low governance participation.
-Stakeholder Consideration: The plan includes outreach to diverse community segments via the DAO’s social channels (e.g., Discord, X) to ensure awareness and feedback.
+Stakeholder Consideration: The plan includes outreach to diverse community segments via the DAO's social channels (e.g., Discord, X) to ensure awareness and feedback.
 Alignment with DAO Priorities:
-Promotes token utility and community engagement, core to the DAO’s mission.
+Promotes token utility and community engagement, core to the DAO's mission.
 Supports financial prudence by capping costs and providing ROI through increased governance participation (measurable via voting turnout post-airdrop).
 Risk Mitigation:
-Financial Risk: Limited to 10,000 $FACES and $500, with no ongoing costs.
+Financial Risk: Limited to 1,000 $FACES and $500 for this proposal, with no ongoing costs. The full campaign is capped at 10,000 tokens and $5,000 across all proposals.
 Execution Risk: Smart contract audit to prevent vulnerabilities.
 Inclusion Risk: Transparent eligibility criteria to avoid disputes.
 Deliverables and ROI:
@@ -89,7 +87,7 @@ async def create_test_proposal(dao_id: UUID) -> UUID:
 Responds to feedback on inclusion by defining clear eligibility and outreach strategies.
 Aligns with financial priorities by justifying costs and capping token usage.
 Commitment:
-I will execute the airdrop as outlined, provide regular updates on the DAO’s governance forum, and deliver a comprehensive report with proof of distribution. If the proposal is approved, I will collaborate with the DAO’s technical and community teams to ensure success.
+I will execute the airdrop as outlined, provide regular updates on the DAO's governance forum, and deliver a comprehensive report with proof of distribution. If the proposal is approved, I will collaborate with the DAO's technical and community teams to ensure success.
 """
 
     # # Convert parameters to JSON string and then hex encode it
diff --git a/main.py b/main.py
index b6d71f27..6a9f52c7 100644
--- a/main.py
+++ b/main.py
@@ -3,7 +3,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
-import api
+from api import chat, tools, webhooks
 from config import config
 from lib.logger import configure_logger
 from services import startup
@@ -48,9 +48,9 @@ async def health_check():
 
 
 # Load API routes
-app.include_router(api.tools.router)
-app.include_router(api.chat.router)
-app.include_router(api.webhooks.router)
+app.include_router(tools.router)
+app.include_router(chat.router)
+app.include_router(webhooks.router)
 
 
 @app.on_event("startup")
diff --git a/services/workflows/agents/core_context.py b/services/workflows/agents/core_context.py
index bece9a93..4e010486 100644
--- a/services/workflows/agents/core_context.py
+++ b/services/workflows/agents/core_context.py
@@ -80,28 +80,47 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
 
         prompt = PromptTemplate(
             input_variables=["proposal_data", "dao_mission"],
-            template="""Evaluate the proposal against the DAO's mission and values.
-
-# Context
-You are evaluating a proposal for a DAO that focuses on: {dao_mission}
-
-# Proposal Data
-{proposal_data}
-
-# Task
-Score this proposal from 0-100 based on:
-1. Alignment with DAO mission (40%)
-2. Clarity of proposal (20%)
-3. Feasibility and practicality (20%)
-4. Community benefit (20%)
-
-# Output Format
-Provide:
-- Score (0-100)
-- List of any critical issues or red flags
-- Brief summary of your evaluation
-
-Only return a JSON object with these three fields: score, flags (array), and summary.""",
+            template="""<system>
+  <reminder>
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
+  </reminder>
+  <reminder>
+    If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.
+  </reminder>
+  <reminder>
+    You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.
+  </reminder>
+</system>
+<core_context_evaluation>
+  <dao_mission>
+    {dao_mission}
+  </dao_mission>
+  <proposal_data>
+    {proposal_data}
+  </proposal_data>
+  <task>
+    <criteria>
+      <criterion weight=\"40\">Alignment with DAO mission</criterion>
+      <criterion weight=\"20\">Clarity of proposal</criterion>
+      <criterion weight=\"20\">Feasibility and practicality</criterion>
+      <criterion weight=\"20\">Community benefit</criterion>
+    </criteria>
+    <scoring_guide>
+      <score range=\"0-20\">Not aligned, unclear, impractical, or no community benefit</score>
+      <score range=\"21-50\">Significant issues or missing details</score>
+      <score range=\"51-70\">Adequate but with some concerns or minor risks</score>
+      <score range=\"71-90\">Good alignment, clear, practical, and beneficial</score>
+      <score range=\"91-100\">Excellent alignment, clarity, feasibility, and community value</score>
+    </scoring_guide>
+  </task>
+  <output_format>
+    Provide:
+    <score>A number from 0-100</score>
+    <flags>List of any critical issues or red flags</flags>
+    <summary>Brief summary of your evaluation</summary>
+    Only return a JSON object with these three fields: score, flags (array), and summary.
+  </output_format>
+</core_context_evaluation>""",
         )
 
         try:
diff --git a/services/workflows/agents/financial_context.py b/services/workflows/agents/financial_context.py
index 1302b589..28e580bc 100644
--- a/services/workflows/agents/financial_context.py
+++ b/services/workflows/agents/financial_context.py
@@ -49,52 +49,83 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
         funding_priorities = dao_financial_context.get("funding_priorities", [])
         financial_constraints = dao_financial_context.get("financial_constraints", [])
 
-        # Format financial context for the prompt
-        financial_context_text = f"""
-Treasury Balance: {treasury_balance}
-Monthly Budget: {monthly_budget}
-Funding Priorities: {', '.join(funding_priorities) if funding_priorities else 'Not specified'}
-Financial Constraints: {', '.join(financial_constraints) if financial_constraints else 'Not specified'}
-"""
-
         prompt = PromptTemplate(
-            input_variables=["proposal_data", "financial_context"],
-            template="""Evaluate the financial aspects of this proposal for the DAO.
-
-# Proposal
-{proposal_data}
-
-# DAO Financial Context
-{financial_context}
-
-# Task
-Score this proposal from 0-100 based on:
-1. Cost-effectiveness and value for money (40%)
-2. Budget accuracy and detail (20%)
-3. Financial risk assessment (20%)
-4. Alignment with DAO's financial priorities (20%)
-
-When analyzing, consider:
-- Is the proposal requesting a reasonable amount?
-- Are costs well-justified with clear deliverables?
-- Are there hidden or underestimated costs?
-- Does it align with the DAO's financial priorities?
-- What is the potential ROI (Return on Investment)?
-- Are there financial risks or dependencies?
-
-# Output Format
-Provide:
-- Score (0-100)
-- List of any critical financial issues or red flags
-- Brief summary of your financial evaluation
-
-Only return a JSON object with these three fields: score, flags (array), and summary.""",
+            input_variables=[
+                "proposal_data",
+                "treasury_balance",
+                "monthly_budget",
+                "funding_priorities",
+                "financial_constraints",
+            ],
+            template="""<system>
+  <reminder>
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
+  </reminder>
+  <reminder>
+    If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.
+  </reminder>
+  <reminder>
+    You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.
+  </reminder>
+</system>
+<financial_context_evaluation>
+  <current_proposal>
+    {proposal_data}
+  </current_proposal>
+  <dao_financial_context>
+    <treasury_balance>{treasury_balance}</treasury_balance>
+    <monthly_budget>{monthly_budget}</monthly_budget>
+    <funding_priorities>{funding_priorities}</funding_priorities>
+    <financial_constraints>{financial_constraints}</financial_constraints>
+  </dao_financial_context>
+  <task>
+    <criteria>
+      <criterion weight=\"40\">Cost-effectiveness and value for money</criterion>
+      <criterion weight=\"20\">Budget accuracy and detail</criterion>
+      <criterion weight=\"20\">Financial risk assessment</criterion>
+      <criterion weight=\"20\">Alignment with DAO's financial priorities</criterion>
+    </criteria>
+    <considerations>
+      <consideration>Is the proposal requesting a reasonable amount?</consideration>
+      <consideration>Are costs well-justified with clear deliverables?</consideration>
+      <consideration>Are there hidden or underestimated costs?</consideration>
+      <consideration>Does it align with the DAO's financial priorities?</consideration>
+      <consideration>What is the potential ROI (Return on Investment)?</consideration>
+      <consideration>Are there financial risks or dependencies?</consideration>
+    </considerations>
+    <scoring_guide>
+      <score range=\"0-20\">Very poor financial justification, high risk, or not aligned with priorities</score>
+      <score range=\"21-50\">Significant issues or missing details, questionable value</score>
+      <score range=\"51-70\">Adequate but with some concerns or minor risks</score>
+      <score range=\"71-90\">Good value, well-justified, low risk, fits priorities</score>
+      <score range=\"91-100\">Excellent value, clear ROI, no concerns, highly aligned</score>
+    </scoring_guide>
+  </task>
+  <output_format>
+    Provide:
+    <score>A number from 0-100</score>
+    <flags>List of any critical financial issues or red flags</flags>
+    <summary>Brief summary of your financial evaluation</summary>
+    Only return a JSON object with these three fields: score, flags (array), and summary.
+  </output_format>
+</financial_context_evaluation>""",
         )
 
         try:
             formatted_prompt_text = prompt.format(
                 proposal_data=proposal_content,
-                financial_context=financial_context_text,
+                treasury_balance=treasury_balance,
+                monthly_budget=monthly_budget,
+                funding_priorities=(
+                    ", ".join(funding_priorities)
+                    if funding_priorities
+                    else "Not specified"
+                ),
+                financial_constraints=(
+                    ", ".join(financial_constraints)
+                    if financial_constraints
+                    else "Not specified"
+                ),
             )
             message_content_list = [{"type": "text", "text": formatted_prompt_text}]
 
diff --git a/services/workflows/agents/historical_context.py b/services/workflows/agents/historical_context.py
index 70c34121..213b87be 100644
--- a/services/workflows/agents/historical_context.py
+++ b/services/workflows/agents/historical_context.py
@@ -1,9 +1,12 @@
 from typing import Any, Dict, List, Optional
+from uuid import UUID
 
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import HumanMessage
 from pydantic import BaseModel, Field
 
+from backend.factory import backend
+from backend.models import Proposal, ProposalFilter
 from lib.logger import configure_logger
 from services.workflows.capability_mixins import BaseCapabilityMixin
 from services.workflows.utils.models import AgentOutput
@@ -43,6 +46,63 @@ def _initialize_vector_capability(self):
                 "Initialized vector retrieval capability for HistoricalContextAgent"
             )
 
+    async def _fetch_dao_proposals(self, dao_id: UUID) -> List[Proposal]:
+        """Fetch all proposals for a specific DAO from Supabase.
+
+        Args:
+            dao_id: The UUID of the DAO
+
+        Returns:
+            List of Proposal objects
+        """
+        try:
+            # Create filter to get all proposals for this DAO
+            filters = ProposalFilter(dao_id=dao_id)
+
+            # Fetch proposals
+            proposals = backend.list_proposals(filters)
+            self.logger.debug(f"Retrieved {len(proposals)} proposals for DAO {dao_id}")
+            return proposals
+        except Exception as e:
+            self.logger.error(f"Error fetching proposals for DAO {dao_id}: {str(e)}")
+            return []
+
+    def _format_proposals_for_context(self, proposals: List[Proposal]) -> str:
+        """Format proposals for inclusion in the prompt.
+
+        Args:
+            proposals: List of all proposals
+
+        Returns:
+            Formatted text of past proposals
+        """
+        # Sort proposals by creation date (newest first to prioritize recent history)
+        sorted_proposals = sorted(proposals, key=lambda p: p.created_at, reverse=True)
+
+        # Format individual proposals with all relevant details
+        past_proposals_text = (
+            "\n\n".join(
+                [
+                    f'<proposal id="{i+1}">\n'
+                    f"  <title>{proposal.title or 'Untitled'}</title>\n"
+                    f"  <description>{proposal.description or 'No description'}</description>\n"
+                    f"  <status>{proposal.status or 'Unknown'}</status>\n"
+                    f"  <type>{proposal.type or 'Unknown'}</type>\n"
+                    f"  <created_at>{proposal.created_at.strftime('%Y-%m-%d') if proposal.created_at else 'Unknown'}</created_at>\n"
+                    f"  <passed>{proposal.passed or False}</passed>\n"
+                    f"  <action>{proposal.action or 'None'}</action>\n"
+                    f"</proposal>"
+                    for i, proposal in enumerate(
+                        sorted_proposals[:8]
+                    )  # Limit to first 8 for context
+                ]
+            )
+            if proposals
+            else "<no_proposals>No past proposals available.</no_proposals>"
+        )
+
+        return past_proposals_text
+
     async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
         """Process the proposal against historical context.
 
@@ -55,16 +115,27 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
         self._initialize_vector_capability()
         proposal_id = state.get("proposal_id", "unknown")
         proposal_content = state.get("proposal_data", "")
+        dao_id = state.get("dao_id")
 
         # Initialize token usage tracking in state if not present
         if "token_usage" not in state:
             state["token_usage"] = {}
 
-        # Retrieve similar past proposals if possible
-        past_proposals_text = ""
+        # Retrieve all proposals for this DAO from Supabase
+        dao_proposals = []
+        if dao_id:
+            dao_proposals = await self._fetch_dao_proposals(dao_id)
+
+        # Format database proposals for context
+        past_proposals_db_text = ""
+        if dao_proposals:
+            past_proposals_db_text = self._format_proposals_for_context(dao_proposals)
+
+        # Retrieve similar past proposals from vector store if possible
+        past_proposals_vector_text = ""
         try:
             self.logger.debug(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Retrieving similar past proposals"
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Retrieving similar past proposals from vector store"
             )
             similar_proposals = await self.retrieve_from_vector_store(
                 query=proposal_content[
@@ -75,55 +146,97 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
                 ),
                 limit=3,
             )
-            past_proposals_text = "\n\n".join(
+            past_proposals_vector_text = "\n\n".join(
                 [
-                    f"Past Proposal {i+1}:\n{doc.page_content}"
+                    f'<similar_proposal id="{i+1}">\n{doc.page_content}\n</similar_proposal>'
                     for i, doc in enumerate(similar_proposals)
                 ]
             )
         except Exception as e:
             self.logger.error(
-                f"[DEBUG:HistoricalAgent:{proposal_id}] Error retrieving similar proposals: {str(e)}"
+                f"[DEBUG:HistoricalAgent:{proposal_id}] Error retrieving similar proposals from vector store: {str(e)}"
+            )
+            past_proposals_vector_text = "<no_similar_proposals>No similar past proposals available in vector store.</no_similar_proposals>"
+
+        # Combine both sources of past proposals
+        past_proposals_text = past_proposals_db_text
+        if past_proposals_vector_text:
+            past_proposals_text += (
+                "\n\n" + past_proposals_vector_text
+                if past_proposals_text
+                else past_proposals_vector_text
             )
-            past_proposals_text = "No similar past proposals available."
 
         prompt = PromptTemplate(
             input_variables=["proposal_data", "past_proposals"],
-            template="""Evaluate this proposal in the context of the DAO's past decisions and similar proposals.
-
-# Current Proposal
-{proposal_data}
-
-# Similar Past Proposals
-{past_proposals}
-
-# Task
-Evaluate whether this proposal:
-1. Is a duplicate of past proposals (40%)
-2. Has addressed issues raised in similar past proposals (30%)
-3. Shows consistency with past approved proposals (30%)
-
-Score this proposal from 0-100 based on the criteria above.
-- 0-20: Exact duplicate or contradicts previous decisions
-- 21-50: Significant overlap without addressing past concerns
-- 51-70: Similar to past proposals but with improvements
-- 71-90: Builds well on past work with few concerns
-- 91-100: Unique proposal or excellent improvement on past proposals
-
-# Output Format
-Provide:
-- Score (0-100)
-- List of any critical issues or red flags
-- Brief summary of your evaluation 
-
-Only return a JSON object with these three fields: score, flags (array), and summary.""",
+            template="""<system>
+  <reminder>
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
+  </reminder>
+  <reminder>
+    If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.
+  </reminder>
+  <reminder>
+    You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.
+  </reminder>
+</system>
+<historical_context_evaluation>
+  <current_proposal>
+    {proposal_data}
+  </current_proposal>
+  <past_dao_proposals>
+    {past_proposals}
+  </past_dao_proposals>
+  <task>
+    <sequence_analysis>
+      First, analyze the proposals to identify any sequences or relationships between them:
+      <criteria>
+        <criterion>Look for proposals with similar titles, themes, or goals</criterion>
+        <criterion>Identify proposals that might be parts of a multi-stage initiative</criterion>
+        <criterion>Detect proposals that might be attempting to circumvent the 1000 token payout limit per proposal by splitting a large request into multiple smaller proposals</criterion>
+        <criterion>Consider chronological relationships between proposals</criterion>
+      </criteria>
+    </sequence_analysis>
+    <proposal_evaluation>
+      Then, evaluate whether this proposal:
+      <criteria>
+        <criterion weight=\"25\">Is a duplicate of past proposals</criterion>
+        <criterion weight=\"20\">Has addressed issues raised in similar past proposals</criterion>
+        <criterion weight=\"25\">Shows consistency with past approved proposals</criterion>
+        <criterion weight=\"30\">Is potentially part of a sequence of proposals to exceed limits
+          <details>
+            <detail>The DAO has a 1000 token payout limit per proposal</detail>
+            <detail>Submitters might split large requests across multiple proposals to get around this limit</detail>
+            <detail>Look for patterns like similar requesters, recipients, or incremental funding for the same project</detail>
+          </details>
+        </criterion>
+      </criteria>
+    </proposal_evaluation>
+    <scoring_guide>
+      Score this proposal from 0-100 based on the criteria above.
+      <score range=\"0-20\">Exact duplicate, contradicts previous decisions, or appears to be gaming token limits</score>
+      <score range=\"21-50\">Significant overlap without addressing past concerns or suspicious sequence pattern</score>
+      <score range=\"51-70\">Similar to past proposals but with improvements and reasonable sequence relationship (if any)</score>
+      <score range=\"71-90\">Builds well on past work with few concerns and transparent relationships to other proposals</score>
+      <score range=\"91-100\">Unique proposal or excellent improvement on past proposals with clear, legitimate purpose</score>
+    </scoring_guide>
+  </task>
+  <output_format>
+    Provide:
+    <score>A number from 0-100</score>
+    <flags>List of any critical issues or red flags</flags>
+    <summary>Brief summary of your evaluation</summary>
+    <sequence_analysis>Identify any proposal sequences and explain how this proposal might relate to others</sequence_analysis>
+    Only return a JSON object with these four fields: score, flags (array), summary, and sequence_analysis.
+  </output_format>
+</historical_context_evaluation>""",
         )
 
         try:
             formatted_prompt_text = prompt.format(
                 proposal_data=proposal_content,
                 past_proposals=past_proposals_text
-                or "No past proposals available for comparison.",
+                or "<no_proposals>No past proposals available for comparison.</no_proposals>",
             )
             message_content_list = [{"type": "text", "text": formatted_prompt_text}]
 
@@ -156,4 +269,5 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
                 "score": 50,
                 "flags": [f"Error: {str(e)}"],
                 "summary": "Historical evaluation failed due to error",
+                "sequence_analysis": "Could not analyze potential proposal sequences due to error.",
             }
diff --git a/services/workflows/agents/reasoning.py b/services/workflows/agents/reasoning.py
index 30aa4289..e3cf965f 100644
--- a/services/workflows/agents/reasoning.py
+++ b/services/workflows/agents/reasoning.py
@@ -191,33 +191,44 @@ def safe_get_score(value, default=0):
 
         prompt = PromptTemplate(
             input_variables=["agent_evaluations", "approval_threshold"],
-            template="""Analyze the specialized agent evaluations and make a final decision on this proposal.
-
-# Agent Evaluations
-{agent_evaluations}
-
-# Decision Guidelines
-- The default threshold for approval is {approval_threshold}/100
-- A proposal with any agent score below 30 should typically be rejected
-- A proposal with high consensus (small range between scores) increases confidence
-- A proposal with high disagreement (large range between scores) decreases confidence
-- Consider the reasoning behind each agent's score, not just the numerical value
-- Critical flags should be weighted heavily in your decision
-
-# Task
-1. Analyze the evaluations from all agents
-2. Consider the significance of any critical flags
-3. Weigh the relative importance of different evaluation dimensions
-4. Make a final decision (Approve or Reject) with a final score
-5. Provide clear reasoning for your decision
-
-# Output Format
-Your response should be a JSON object with:
-- score: A final score from 0-100
-- decision: Either "Approve" or "Reject"
-- explanation: Your reasoning for the decision
-
-Return only the JSON object with these three fields.""",
+            template="""<system>
+  <reminder>
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
+  </reminder>
+  <reminder>
+    If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.
+  </reminder>
+  <reminder>
+    You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.
+  </reminder>
+</system>
+<reasoning_evaluation>
+  <agent_evaluations>
+    {agent_evaluations}
+  </agent_evaluations>
+  <decision_guidelines>
+    <threshold>The default threshold for approval is {approval_threshold}/100</threshold>
+    <rule>A proposal with any agent score below 30 should typically be rejected</rule>
+    <rule>A proposal with high consensus (small range between scores) increases confidence</rule>
+    <rule>A proposal with high disagreement (large range between scores) decreases confidence</rule>
+    <rule>Consider the reasoning behind each agent's score, not just the numerical value</rule>
+    <rule>Critical flags should be weighted heavily in your decision</rule>
+  </decision_guidelines>
+  <task>
+    <step>Analyze the evaluations from all agents</step>
+    <step>Consider the significance of any critical flags</step>
+    <step>Weigh the relative importance of different evaluation dimensions</step>
+    <step>Make a final decision (Approve or Reject) with a final score</step>
+    <step>Provide clear reasoning for your decision</step>
+  </task>
+  <output_format>
+    Provide:
+    <score>A final score from 0-100</score>
+    <decision>Either "Approve" or "Reject"</decision>
+    <explanation>Your reasoning for the decision</explanation>
+    Only return a JSON object with these three fields: score, decision, and explanation.
+  </output_format>
+</reasoning_evaluation>""",
         )
 
         try:
@@ -225,7 +236,6 @@ def safe_get_score(value, default=0):
                 agent_evaluations=agent_evaluations,
                 approval_threshold=self.default_threshold,
             )
-
             llm_input_message = HumanMessage(content=formatted_prompt_text)
 
             # Get structured output from the LLM
diff --git a/services/workflows/agents/social_context.py b/services/workflows/agents/social_context.py
index 68e687e7..9ca601c5 100644
--- a/services/workflows/agents/social_context.py
+++ b/services/workflows/agents/social_context.py
@@ -134,39 +134,58 @@ async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
 
         prompt = PromptTemplate(
             input_variables=["proposal_data", "search_results", "community_info"],
-            template="""Evaluate the social impact and community aspects of this proposal.
-
-# Proposal
-{proposal_data}
-
-# Community Information
-{community_info}
-
-# External Context
-{search_results}
-
-# Task
-Score this proposal from 0-100 based on:
-1. Community benefit and inclusion (40%)
-2. Alignment with community values and interests (30%)
-3. Potential for community engagement (20%)
-4. Consideration of diverse stakeholders (10%)
-
-When analyzing, consider:
-- Will this proposal benefit the broader community or just a few members?
-- Is there likely community support or opposition?
-- Does it foster inclusivity and participation?
-- Does it align with the community's values and interests?
-- Could it cause controversy or division?
-- Does it consider the needs of diverse stakeholders?
-
-# Output Format
-Provide:
-- Score (0-100)
-- List of any critical social issues or red flags
-- Brief summary of your social evaluation
-
-Only return a JSON object with these three fields: score, flags (array), and summary.""",
+            template="""<system>
+  <reminder>
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
+  </reminder>
+  <reminder>
+    If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.
+  </reminder>
+  <reminder>
+    You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.
+  </reminder>
+</system>
+<social_context_evaluation>
+  <proposal_data>
+    {proposal_data}
+  </proposal_data>
+  <community_info>
+    {community_info}
+  </community_info>
+  <external_context>
+    {search_results}
+  </external_context>
+  <task>
+    <criteria>
+      <criterion weight=\"40\">Community benefit and inclusion</criterion>
+      <criterion weight=\"30\">Alignment with community values and interests</criterion>
+      <criterion weight=\"20\">Potential for community engagement</criterion>
+      <criterion weight=\"10\">Consideration of diverse stakeholders</criterion>
+    </criteria>
+    <considerations>
+      <consideration>Will this proposal benefit the broader community or just a few members?</consideration>
+      <consideration>Is there likely community support or opposition?</consideration>
+      <consideration>Does it foster inclusivity and participation?</consideration>
+      <consideration>Does it align with the community's values and interests?</consideration>
+      <consideration>Could it cause controversy or division?</consideration>
+      <consideration>Does it consider the needs of diverse stakeholders?</consideration>
+    </considerations>
+    <scoring_guide>
+      <score range=\"0-20\">No benefit, misaligned, or divisive</score>
+      <score range=\"21-50\">Significant issues or missing details</score>
+      <score range=\"51-70\">Adequate but with some concerns or minor risks</score>
+      <score range=\"71-90\">Good benefit, aligned, and inclusive</score>
+      <score range=\"91-100\">Excellent benefit, highly aligned, and unifying</score>
+    </scoring_guide>
+  </task>
+  <output_format>
+    Provide:
+    <score>A number from 0-100</score>
+    <flags>List of any critical social issues or red flags</flags>
+    <summary>Brief summary of your social evaluation</summary>
+    Only return a JSON object with these three fields: score, flags (array), and summary.
+  </output_format>
+</social_context_evaluation>""",
         )
 
         try:
diff --git a/services/workflows/chat.py b/services/workflows/chat.py
index fd105f01..a9eb724d 100644
--- a/services/workflows/chat.py
+++ b/services/workflows/chat.py
@@ -371,7 +371,7 @@ async def retrieve_context(state: ChatState) -> Dict:
 
             # Get web search results
             try:
-                web_results = await self.search_web(last_user_message)
+                web_results = await self.web_search(last_user_message)
                 logger.info(f"Retrieved {len(web_results)} web search results")
             except Exception as e:
                 logger.error(f"Web search failed: {str(e)}")