@@ -322,6 +322,178 @@ print(result2.all_messages())
322
322
"""
323
323
```
324
324
325
+ ## Processing Message History
326
+
327
+ Sometimes you may want to modify the message history before it's sent to the model. This could be for privacy
328
+ reasons (filtering out sensitive information), to save costs on tokens, to give less context to the LLM, or
329
+ custom processing logic.
330
+
331
+ PydanticAI provides a ` history_processors ` parameter on ` Agent ` that allows you to intercept and modify
332
+ the message history before each model request.
333
+
334
+ ### Usage
335
+
336
+ The ` history_processors ` is a list of callables that take a list of
337
+ [ ` ModelMessage ` ] [ pydantic_ai.messages.ModelMessage ] and return a modified list of the same type.
338
+
339
+ Each processor is applied in sequence, and processors can be either synchronous or asynchronous.
340
+
341
+ ``` python {title="simple_history_processor.py"}
342
+ from pydantic_ai import Agent
343
+ from pydantic_ai.messages import (
344
+ ModelMessage,
345
+ ModelRequest,
346
+ ModelResponse,
347
+ TextPart,
348
+ UserPromptPart,
349
+ )
350
+
351
+
352
+ def filter_responses (messages : list[ModelMessage]) -> list[ModelMessage]:
353
+ """ Remove all ModelResponse messages, keeping only ModelRequest messages."""
354
+ return [msg for msg in messages if isinstance (msg, ModelRequest)]
355
+
356
+ # Create agent with history processor
357
+ agent = Agent(' openai:gpt-4o' , history_processors = [filter_responses])
358
+
359
+ # Example: Create some conversation history
360
+ message_history = [
361
+ ModelRequest(parts = [UserPromptPart(content = ' What is 2+2?' )]),
362
+ ModelResponse(parts = [TextPart(content = ' 2+2 equals 4' )]), # This will be filtered out
363
+ ]
364
+
365
+ # When you run the agent, the history processor will filter out ModelResponse messages
366
+ # result = agent.run_sync('What about 3+3?', message_history=message_history)
367
+ ```
368
+
369
+ #### Keep Only Recent Messages
370
+
371
+ You can use the ` history_processor ` to only keep the recent messages:
372
+
373
+ ``` python {title="keep_recent_messages.py"}
374
+ from pydantic_ai import Agent
375
+ from pydantic_ai.messages import ModelMessage
376
+
377
+
378
+ async def keep_recent_messages (messages : list[ModelMessage]) -> list[ModelMessage]:
379
+ """ Keep only the last 5 messages to manage token usage."""
380
+ return messages[- 5 :] if len (messages) > 5 else messages
381
+
382
+ agent = Agent(' openai:gpt-4o' , history_processors = [keep_recent_messages])
383
+
384
+ # Example: Even with a long conversation history, only the last 5 messages are sent to the model
385
+ long_conversation_history: list[ModelMessage] = [] # Your long conversation history here
386
+ # result = agent.run_sync('What did we discuss?', message_history=long_conversation_history)
387
+ ```
388
+
389
+ #### Summarize Old Messages
390
+
391
+ Use an LLM to summarize older messages to preserve context while reducing tokens.
392
+
393
+ ``` python {title="summarize_old_messages.py"}
394
+ from pydantic_ai import Agent
395
+ from pydantic_ai.messages import ModelMessage
396
+
397
+ # Use a cheaper model to summarize old messages.
398
+ summarize_agent = Agent(
399
+ ' openai:gpt-4o-mini' ,
400
+ instructions = """
401
+ Summarize this conversation, omitting small talk and unrelated topics.
402
+ Focus on the technical discussion and next steps.
403
+ """ ,
404
+ )
405
+
406
+
407
+ async def summarize_old_messages (messages : list[ModelMessage]) -> list[ModelMessage]:
408
+ # Summarize the oldest 10 messages
409
+ if len (messages) > 10 :
410
+ oldest_messages = messages[:10 ]
411
+ summary = await summarize_agent.run(message_history = oldest_messages)
412
+ # Return the last message and the summary
413
+ return summary.new_messages() + messages[- 1 :]
414
+
415
+ return messages
416
+
417
+
418
+ agent = Agent(' openai:gpt-4o' , history_processors = [summarize_old_messages])
419
+ ```
420
+
421
+ ### Testing History Processors
422
+
423
+ You can test what messages are actually sent to the model provider using
424
+ [ ` FunctionModel ` ] [ pydantic_ai.models.function.FunctionModel ] :
425
+
426
+ ``` python {title="test_history_processor.py"}
427
+ import pytest
428
+
429
+ from pydantic_ai import Agent
430
+ from pydantic_ai.messages import (
431
+ ModelMessage,
432
+ ModelRequest,
433
+ ModelResponse,
434
+ TextPart,
435
+ UserPromptPart,
436
+ )
437
+ from pydantic_ai.models.function import AgentInfo, FunctionModel
438
+
439
+
440
+ @pytest.fixture
441
+ def received_messages () -> list[ModelMessage]:
442
+ return []
443
+
444
+
445
+ @pytest.fixture
446
+ def function_model (received_messages : list[ModelMessage]) -> FunctionModel:
447
+ def capture_model_function (messages : list[ModelMessage], info : AgentInfo) -> ModelResponse:
448
+ # Capture the messages that the provider actually receives
449
+ received_messages.clear()
450
+ received_messages.extend(messages)
451
+ return ModelResponse(parts = [TextPart(content = ' Provider response' )])
452
+
453
+ return FunctionModel(capture_model_function)
454
+
455
+
456
+ def test_history_processor (function_model : FunctionModel, received_messages : list[ModelMessage]):
457
+ def filter_responses (messages : list[ModelMessage]) -> list[ModelMessage]:
458
+ return [msg for msg in messages if isinstance (msg, ModelRequest)]
459
+
460
+ agent = Agent(function_model, history_processors = [filter_responses])
461
+
462
+ message_history = [
463
+ ModelRequest(parts = [UserPromptPart(content = ' Question 1' )]),
464
+ ModelResponse(parts = [TextPart(content = ' Answer 1' )]),
465
+ ]
466
+
467
+ agent.run_sync(' Question 2' , message_history = message_history)
468
+ assert received_messages == [
469
+ ModelRequest(parts = [UserPromptPart(content = ' Question 1' )]),
470
+ ModelRequest(parts = [UserPromptPart(content = ' Question 2' )]),
471
+ ]
472
+ ```
473
+
474
+ ### Multiple Processors
475
+
476
+ You can also use multiple processors:
477
+
478
+ ``` python {title="multiple_history_processors.py"}
479
+ from pydantic_ai import Agent
480
+ from pydantic_ai.messages import ModelMessage, ModelRequest
481
+
482
+
483
+ def filter_responses (messages : list[ModelMessage]) -> list[ModelMessage]:
484
+ return [msg for msg in messages if isinstance (msg, ModelRequest)]
485
+
486
+
487
+ def summarize_old_messages (messages : list[ModelMessage]) -> list[ModelMessage]:
488
+ return messages[- 5 :]
489
+
490
+
491
+ agent = Agent(' openai:gpt-4o' , history_processors = [filter_responses, summarize_old_messages])
492
+ ```
493
+
494
+ In this case, the ` filter_responses ` processor will be applied first, and the
495
+ ` summarize_old_messages ` processor will be applied second.
496
+
325
497
## Examples
326
498
327
499
For a more complete example of using messages in conversations, see the [ chat app] ( examples/chat-app.md ) example.
0 commit comments