feat: add history_processors parameter to Agent for message processing (#1970)

Kludex · claude · DouweM · web-flow · commit 665151025011 · 2025-06-16T18:14:34.000+02:00
Co-authored-by: Claude &lt;noreply@anthropic.com&gt;
Co-authored-by: Douwe Maan &lt;douwe@pydantic.dev&gt;
diff --git a/docs/message-history.md b/docs/message-history.md
@@ -322,6 +322,178 @@ print(result2.all_messages())
 """
 ```
 
+## Processing Message History
+
+Sometimes you may want to modify the message history before it's sent to the model. This could be for privacy
+reasons (filtering out sensitive information), to save costs on tokens, to give less context to the LLM, or
+custom processing logic.
+
+PydanticAI provides a `history_processors` parameter on `Agent` that allows you to intercept and modify
+the message history before each model request.
+
+### Usage
+
+The `history_processors` is a list of callables that take a list of
+[`ModelMessage`][pydantic_ai.messages.ModelMessage] and return a modified list of the same type.
+
+Each processor is applied in sequence, and processors can be either synchronous or asynchronous.
+
+```python {title="simple_history_processor.py"}
+from pydantic_ai import Agent
+from pydantic_ai.messages import (
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    TextPart,
+    UserPromptPart,
+)
+
+
+def filter_responses(messages: list[ModelMessage]) -> list[ModelMessage]:
+    """Remove all ModelResponse messages, keeping only ModelRequest messages."""
+    return [msg for msg in messages if isinstance(msg, ModelRequest)]
+
+# Create agent with history processor
+agent = Agent('openai:gpt-4o', history_processors=[filter_responses])
+
+# Example: Create some conversation history
+message_history = [
+    ModelRequest(parts=[UserPromptPart(content='What is 2+2?')]),
+    ModelResponse(parts=[TextPart(content='2+2 equals 4')]),  # This will be filtered out
+]
+
+# When you run the agent, the history processor will filter out ModelResponse messages
+# result = agent.run_sync('What about 3+3?', message_history=message_history)
+```
+
+#### Keep Only Recent Messages
+
+You can use the `history_processor` to only keep the recent messages:
+
+```python {title="keep_recent_messages.py"}
+from pydantic_ai import Agent
+from pydantic_ai.messages import ModelMessage
+
+
+async def keep_recent_messages(messages: list[ModelMessage]) -> list[ModelMessage]:
+    """Keep only the last 5 messages to manage token usage."""
+    return messages[-5:] if len(messages) > 5 else messages
+
+agent = Agent('openai:gpt-4o', history_processors=[keep_recent_messages])
+
+# Example: Even with a long conversation history, only the last 5 messages are sent to the model
+long_conversation_history: list[ModelMessage] = []  # Your long conversation history here
+# result = agent.run_sync('What did we discuss?', message_history=long_conversation_history)
+```
+
+#### Summarize Old Messages
+
+Use an LLM to summarize older messages to preserve context while reducing tokens.
+
+```python {title="summarize_old_messages.py"}
+from pydantic_ai import Agent
+from pydantic_ai.messages import ModelMessage
+
+# Use a cheaper model to summarize old messages.
+summarize_agent = Agent(
+    'openai:gpt-4o-mini',
+    instructions="""
+Summarize this conversation, omitting small talk and unrelated topics.
+Focus on the technical discussion and next steps.
+""",
+)
+
+
+async def summarize_old_messages(messages: list[ModelMessage]) -> list[ModelMessage]:
+    # Summarize the oldest 10 messages
+    if len(messages) > 10:
+        oldest_messages = messages[:10]
+        summary = await summarize_agent.run(message_history=oldest_messages)
+        # Return the last message and the summary
+        return summary.new_messages() + messages[-1:]
+
+    return messages
+
+
+agent = Agent('openai:gpt-4o', history_processors=[summarize_old_messages])
+```
+
+### Testing History Processors
+
+You can test what messages are actually sent to the model provider using
+[`FunctionModel`][pydantic_ai.models.function.FunctionModel]:
+
+```python {title="test_history_processor.py"}
+import pytest
+
+from pydantic_ai import Agent
+from pydantic_ai.messages import (
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    TextPart,
+    UserPromptPart,
+)
+from pydantic_ai.models.function import AgentInfo, FunctionModel
+
+
+@pytest.fixture
+def received_messages() -> list[ModelMessage]:
+    return []
+
+
+@pytest.fixture
+def function_model(received_messages: list[ModelMessage]) -> FunctionModel:
+    def capture_model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        # Capture the messages that the provider actually receives
+        received_messages.clear()
+        received_messages.extend(messages)
+        return ModelResponse(parts=[TextPart(content='Provider response')])
+
+    return FunctionModel(capture_model_function)
+
+
+def test_history_processor(function_model: FunctionModel, received_messages: list[ModelMessage]):
+    def filter_responses(messages: list[ModelMessage]) -> list[ModelMessage]:
+        return [msg for msg in messages if isinstance(msg, ModelRequest)]
+
+    agent = Agent(function_model, history_processors=[filter_responses])
+
+    message_history = [
+        ModelRequest(parts=[UserPromptPart(content='Question 1')]),
+        ModelResponse(parts=[TextPart(content='Answer 1')]),
+    ]
+
+    agent.run_sync('Question 2', message_history=message_history)
+    assert received_messages == [
+        ModelRequest(parts=[UserPromptPart(content='Question 1')]),
+        ModelRequest(parts=[UserPromptPart(content='Question 2')]),
+    ]
+```
+
+### Multiple Processors
+
+You can also use multiple processors:
+
+```python {title="multiple_history_processors.py"}
+from pydantic_ai import Agent
+from pydantic_ai.messages import ModelMessage, ModelRequest
+
+
+def filter_responses(messages: list[ModelMessage]) -> list[ModelMessage]:
+    return [msg for msg in messages if isinstance(msg, ModelRequest)]
+
+
+def summarize_old_messages(messages: list[ModelMessage]) -> list[ModelMessage]:
+    return messages[-5:]
+
+
+agent = Agent('openai:gpt-4o', history_processors=[filter_responses, summarize_old_messages])
+```
+
+In this case, the `filter_responses` processor will be applied first, and the
+`summarize_old_messages` processor will be applied second.
+
 ## Examples
 
 For a more complete example of using messages in conversations, see the [chat app](examples/chat-app.md) example.
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -12,18 +12,11 @@
 from opentelemetry.trace import Tracer
 from typing_extensions import TypeGuard, TypeVar, assert_never
 
+from pydantic_ai._utils import is_async_callable, run_in_executor
 from pydantic_graph import BaseNode, Graph, GraphRunContext
 from pydantic_graph.nodes import End, NodeRunEndT
 
-from . import (
-    _output,
-    _system_prompt,
-    exceptions,
-    messages as _messages,
-    models,
-    result,
-    usage as _usage,
-)
+from . import _output, _system_prompt, exceptions, messages as _messages, models, result, usage as _usage
 from .result import OutputDataT
 from .settings import ModelSettings, merge_model_settings
 from .tools import RunContext, Tool, ToolDefinition, ToolsPrepareFunc
@@ -39,6 +32,7 @@
     'CallToolsNode',
     'build_run_context',
     'capture_run_messages',
+    'HistoryProcessor',
 )
 
 
@@ -54,6 +48,11 @@
 DepsT = TypeVar('DepsT')
 OutputT = TypeVar('OutputT')
 
+_HistoryProcessorSync = Callable[[list[_messages.ModelMessage]], list[_messages.ModelMessage]]
+_HistoryProcessorAsync = Callable[[list[_messages.ModelMessage]], Awaitable[list[_messages.ModelMessage]]]
+HistoryProcessor = Union[_HistoryProcessorSync, _HistoryProcessorAsync]
+"""A function that processes a list of model messages and returns a list of model messages."""
+
 
 @dataclasses.dataclass
 class GraphAgentState:
@@ -93,6 +92,8 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
     output_schema: _output.OutputSchema[OutputDataT] | None
     output_validators: list[_output.OutputValidator[DepsT, OutputDataT]]
 
+    history_processors: Sequence[HistoryProcessor]
+
     function_tools: dict[str, Tool[DepsT]] = dataclasses.field(repr=False)
     mcp_servers: Sequence[MCPServer] = dataclasses.field(repr=False)
     default_retries: int
@@ -327,8 +328,9 @@ async def _stream(
 
         model_settings, model_request_parameters = await self._prepare_request(ctx)
         model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
+        message_history = await _process_message_history(ctx.state.message_history, ctx.deps.history_processors)
         async with ctx.deps.model.request_stream(
-            ctx.state.message_history, model_settings, model_request_parameters
+            message_history, model_settings, model_request_parameters
         ) as streamed_response:
             self._did_stream = True
             ctx.state.usage.requests += 1
@@ -350,9 +352,8 @@ async def _make_request(
 
         model_settings, model_request_parameters = await self._prepare_request(ctx)
         model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
-        model_response = await ctx.deps.model.request(
-            ctx.state.message_history, model_settings, model_request_parameters
-        )
+        message_history = await _process_message_history(ctx.state.message_history, ctx.deps.history_processors)
+        model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters)
         ctx.state.usage.incr(_usage.Usage())
 
         return self._finish_handling(ctx, model_response)
@@ -865,3 +866,16 @@ def build_agent_graph(
         auto_instrument=False,
     )
     return graph
+
+
+async def _process_message_history(
+    messages: list[_messages.ModelMessage],
+    processors: Sequence[HistoryProcessor],
+) -> list[_messages.ModelMessage]:
+    """Process message history through a sequence of processors."""
+    for processor in processors:
+        if is_async_callable(processor):
+            messages = await processor(messages)
+        else:
+            messages = await run_in_executor(processor, messages)
+    return messages
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -28,6 +28,7 @@
     result,
     usage as _usage,
 )
+from ._agent_graph import HistoryProcessor
 from .models.instrumented import InstrumentationSettings, InstrumentedModel, instrument_model
 from .result import FinalResult, OutputDataT, StreamedRunResult
 from .settings import ModelSettings, merge_model_settings
@@ -179,6 +180,7 @@ def __init__(
         defer_model_check: bool = False,
         end_strategy: EndStrategy = 'early',
         instrument: InstrumentationSettings | bool | None = None,
+        history_processors: Sequence[HistoryProcessor] | None = None,
     ) -> None: ...
 
     @overload
@@ -208,6 +210,7 @@ def __init__(
         defer_model_check: bool = False,
         end_strategy: EndStrategy = 'early',
         instrument: InstrumentationSettings | bool | None = None,
+        history_processors: Sequence[HistoryProcessor] | None = None,
     ) -> None: ...
 
     def __init__(
@@ -232,6 +235,7 @@ def __init__(
         defer_model_check: bool = False,
         end_strategy: EndStrategy = 'early',
         instrument: InstrumentationSettings | bool | None = None,
+        history_processors: Sequence[HistoryProcessor] | None = None,
         **_deprecated_kwargs: Any,
     ):
         """Create an agent.
@@ -275,6 +279,9 @@ def __init__(
                 [`Agent.instrument_all()`][pydantic_ai.Agent.instrument_all]
                 will be used, which defaults to False.
                 See the [Debugging and Monitoring guide](https://ai.pydantic.dev/logfire/) for more info.
+            history_processors: Optional list of callables to process the message history before sending it to the model.
+                Each processor takes a list of messages and returns a modified list of messages.
+                Processors can be sync or async and are applied in sequence.
         """
         if model is None or defer_model_check:
             self.model = model
@@ -343,6 +350,7 @@ def __init__(
         self._max_result_retries = output_retries if output_retries is not None else retries
         self._mcp_servers = mcp_servers
         self._prepare_tools = prepare_tools
+        self.history_processors = history_processors or []
         for tool in tools:
             if isinstance(tool, Tool):
                 self._register_tool(tool)
@@ -690,6 +698,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
             end_strategy=self.end_strategy,
             output_schema=output_schema,
             output_validators=output_validators,
+            history_processors=self.history_processors,
             function_tools=run_function_tools,
             mcp_servers=self._mcp_servers,
             default_retries=self._default_retries,
diff --git a/tests/test_history_processor.py b/tests/test_history_processor.py