Scrubbing sensitive content (#2014)

adtyavrdhn · alexmojaki · web-flow · commit cbe0a92dcdad · 2025-06-25T15:10:26.000+02:00
Co-authored-by: Alex Hall &lt;alex.mojaki@gmail.com&gt;
diff --git a/docs/logfire.md b/docs/logfire.md
@@ -323,3 +323,22 @@ agent = Agent('gpt-4o', instrument=instrumentation_settings)
 # or to instrument all agents:
 Agent.instrument_all(instrumentation_settings)
 ```
+
+### Excluding prompts and completions
+
+For privacy and security reasons, you may want to monitor your agent's behavior and performance without exposing sensitive user data or proprietary prompts in your observability platform. PydanticAI allows you to exclude the actual content from instrumentation events while preserving the structural information needed for debugging and monitoring.
+
+When `include_content=False` is set, PydanticAI will exclude sensitive content from OpenTelemetry events, including user prompts and model completions, tool call arguments and responses, and any other message content.
+
+```python {title="excluding_sensitive_content.py"}
+from pydantic_ai.agent import Agent
+from pydantic_ai.models.instrumented import InstrumentationSettings
+
+instrumentation_settings = InstrumentationSettings(include_content=False)
+
+agent = Agent('gpt-4o', instrument=instrumentation_settings)
+# or to instrument all agents:
+Agent.instrument_all(instrumentation_settings)
+```
+
+This setting is particularly useful in production environments where compliance requirements or data sensitivity concerns make it necessary to limit what content is sent to your observability platform.
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -24,6 +24,7 @@
 
 if TYPE_CHECKING:
     from .mcp import MCPServer
+    from .models.instrumented import InstrumentationSettings
 
 __all__ = (
     'GraphAgentState',
@@ -112,6 +113,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
     default_retries: int
 
     tracer: Tracer
+    instrumentation_settings: InstrumentationSettings | None = None
 
     prepare_tools: ToolsPrepareFunc[DepsT] | None = None
 
@@ -712,6 +714,10 @@ async def process_function_tools(  # noqa C901
 
     user_parts: list[_messages.UserPromptPart] = []
 
+    include_content = (
+        ctx.deps.instrumentation_settings is not None and ctx.deps.instrumentation_settings.include_content
+    )
+
     # Run all tool tasks in parallel
     results_by_index: dict[int, _messages.ModelRequestPart] = {}
     with ctx.deps.tracer.start_as_current_span(
@@ -722,7 +728,7 @@ async def process_function_tools(  # noqa C901
         },
     ):
         tasks = [
-            asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer), name=call.tool_name)
+            asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer, include_content), name=call.tool_name)
             for tool, call in calls_to_run
         ]
 
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -719,6 +719,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
             tracer=tracer,
             prepare_tools=self._prepare_tools,
             get_instructions=get_instructions,
+            instrumentation_settings=instrumentation_settings,
         )
         start_node = _agent_graph.UserPromptNode[AgentDepsT](
             user_prompt=user_prompt,
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -76,8 +76,11 @@ class SystemPromptPart:
     part_kind: Literal['system-prompt'] = 'system-prompt'
     """Part type identifier, this is available on all parts as a discriminator."""
 
-    def otel_event(self, _settings: InstrumentationSettings) -> Event:
-        return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'})
+    def otel_event(self, settings: InstrumentationSettings) -> Event:
+        return Event(
+            'gen_ai.system.message',
+            body={'role': 'system', **({'content': self.content} if settings.include_content else {})},
+        )
 
     __repr__ = _utils.dataclasses_no_defaults_repr
 
@@ -362,12 +365,12 @@ def otel_event(self, settings: InstrumentationSettings) -> Event:
             content = []
             for part in self.content:
                 if isinstance(part, str):
-                    content.append(part)
+                    content.append(part if settings.include_content else {'kind': 'text'})
                 elif isinstance(part, (ImageUrl, AudioUrl, DocumentUrl, VideoUrl)):
-                    content.append({'kind': part.kind, 'url': part.url})
+                    content.append({'kind': part.kind, **({'url': part.url} if settings.include_content else {})})
                 elif isinstance(part, BinaryContent):
                     converted_part = {'kind': part.kind, 'media_type': part.media_type}
-                    if settings.include_binary_content:
+                    if settings.include_content and settings.include_binary_content:
                         converted_part['binary_content'] = base64.b64encode(part.data).decode()
                     content.append(converted_part)
                 else:
@@ -414,10 +417,15 @@ def model_response_object(self) -> dict[str, Any]:
         else:
             return {'return_value': tool_return_ta.dump_python(self.content, mode='json')}
 
-    def otel_event(self, _settings: InstrumentationSettings) -> Event:
+    def otel_event(self, settings: InstrumentationSettings) -> Event:
         return Event(
             'gen_ai.tool.message',
-            body={'content': self.content, 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name},
+            body={
+                **({'content': self.content} if settings.include_content else {}),
+                'role': 'tool',
+                'id': self.tool_call_id,
+                'name': self.tool_name,
+            },
         )
 
     __repr__ = _utils.dataclasses_no_defaults_repr
@@ -473,14 +481,14 @@ def model_response(self) -> str:
             description = f'{len(self.content)} validation errors: {json_errors.decode()}'
         return f'{description}\n\nFix the errors and try again.'
 
-    def otel_event(self, _settings: InstrumentationSettings) -> Event:
+    def otel_event(self, settings: InstrumentationSettings) -> Event:
         if self.tool_name is None:
             return Event('gen_ai.user.message', body={'content': self.model_response(), 'role': 'user'})
         else:
             return Event(
                 'gen_ai.tool.message',
                 body={
-                    'content': self.model_response(),
+                    **({'content': self.model_response()} if settings.include_content else {}),
                     'role': 'tool',
                     'id': self.tool_call_id,
                     'name': self.tool_name,
@@ -657,7 +665,7 @@ class ModelResponse:
     vendor_id: str | None = None
     """Vendor ID as specified by the model provider. This can be used to track the specific request to the model."""
 
-    def otel_events(self) -> list[Event]:
+    def otel_events(self, settings: InstrumentationSettings) -> list[Event]:
         """Return OpenTelemetry events for the response."""
         result: list[Event] = []
 
@@ -683,7 +691,8 @@ def new_event_body():
             elif isinstance(part, TextPart):
                 if body.get('content'):
                     body = new_event_body()
-                body['content'] = part.content
+                if settings.include_content:
+                    body['content'] = part.content
 
         return result
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py
@@ -92,6 +92,7 @@ def __init__(
         meter_provider: MeterProvider | None = None,
         event_logger_provider: EventLoggerProvider | None = None,
         include_binary_content: bool = True,
+        include_content: bool = True,
     ):
         """Create instrumentation options.
 
@@ -109,6 +110,8 @@ def __init__(
                 Calling `logfire.configure()` sets the global event logger provider, so most users don't need this.
                 This is only used if `event_mode='logs'`.
             include_binary_content: Whether to include binary content in the instrumentation events.
+            include_content: Whether to include prompts, completions, and tool call arguments and responses
+                in the instrumentation events.
         """
         from pydantic_ai import __version__
 
@@ -121,6 +124,7 @@ def __init__(
         self.event_logger = event_logger_provider.get_event_logger(scope_name, __version__)
         self.event_mode = event_mode
         self.include_binary_content = include_binary_content
+        self.include_content = include_content
 
         # As specified in the OpenTelemetry GenAI metrics spec:
         # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage
@@ -161,7 +165,7 @@ def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
                     if hasattr(part, 'otel_event'):
                         message_events.append(part.otel_event(self))
             elif isinstance(message, ModelResponse):  # pragma: no branch
-                message_events = message.otel_events()
+                message_events = message.otel_events(self)
             for event in message_events:
                 event.attributes = {
                     'gen_ai.message.index': message_index,
diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py
@@ -327,6 +327,7 @@ async def run(
         message: _messages.ToolCallPart,
         run_context: RunContext[AgentDepsT],
         tracer: Tracer,
+        include_content: bool = False,
     ) -> _messages.ToolReturnPart | _messages.RetryPromptPart:
         """Run the tool function asynchronously.
 
@@ -338,14 +339,14 @@ async def run(
             'gen_ai.tool.name': self.name,
             # NOTE: this means `gen_ai.tool.call.id` will be included even if it was generated by pydantic-ai
             'gen_ai.tool.call.id': message.tool_call_id,
-            'tool_arguments': message.args_as_json_str(),
+            **({'tool_arguments': message.args_as_json_str()} if include_content else {}),
             'logfire.msg': f'running tool: {self.name}',
             # add the JSON schema so these attributes are formatted nicely in Logfire
             'logfire.json_schema': json.dumps(
                 {
                     'type': 'object',
                     'properties': {
-                        'tool_arguments': {'type': 'object'},
+                        **({'tool_arguments': {'type': 'object'}} if include_content else {}),
                         'gen_ai.tool.name': {},
                         'gen_ai.tool.call.id': {},
                     },
diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py
@@ -837,3 +837,88 @@ def test_messages_to_otel_events_without_binary_content(document_content: Binary
             }
         ]
     )
+
+
+def test_messages_without_content(document_content: BinaryContent):
+    messages: list[ModelMessage] = [
+        ModelRequest(parts=[SystemPromptPart('system_prompt')]),
+        ModelResponse(parts=[TextPart('text1')]),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content=[
+                        'user_prompt1',
+                        VideoUrl('https://example.com/video.mp4'),
+                        ImageUrl('https://example.com/image.png'),
+                        AudioUrl('https://example.com/audio.mp3'),
+                        DocumentUrl('https://example.com/document.pdf'),
+                        document_content,
+                    ]
+                )
+            ]
+        ),
+        ModelResponse(parts=[TextPart('text2'), ToolCallPart(tool_name='my_tool', args={'a': 13, 'b': 4})]),
+        ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]),
+        ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]),
+        ModelRequest(parts=[UserPromptPart(content=['user_prompt2', document_content])]),
+    ]
+    settings = InstrumentationSettings(include_content=False)
+    assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
+        [
+            {
+                'role': 'system',
+                'gen_ai.message.index': 0,
+                'event.name': 'gen_ai.system.message',
+            },
+            {
+                'role': 'assistant',
+                'gen_ai.message.index': 1,
+                'event.name': 'gen_ai.assistant.message',
+            },
+            {
+                'content': [
+                    {'kind': 'text'},
+                    {'kind': 'video-url'},
+                    {'kind': 'image-url'},
+                    {'kind': 'audio-url'},
+                    {'kind': 'document-url'},
+                    {'kind': 'binary', 'media_type': 'application/pdf'},
+                ],
+                'role': 'user',
+                'gen_ai.message.index': 2,
+                'event.name': 'gen_ai.user.message',
+            },
+            {
+                'role': 'assistant',
+                'tool_calls': [
+                    {
+                        'id': IsStr(),
+                        'type': 'function',
+                        'function': {'name': 'my_tool', 'arguments': {'a': 13, 'b': 4}},
+                    }
+                ],
+                'gen_ai.message.index': 3,
+                'event.name': 'gen_ai.assistant.message',
+            },
+            {
+                'role': 'tool',
+                'id': 'tool_call_1',
+                'name': 'tool',
+                'gen_ai.message.index': 4,
+                'event.name': 'gen_ai.tool.message',
+            },
+            {
+                'role': 'tool',
+                'id': 'tool_call_2',
+                'name': 'tool',
+                'gen_ai.message.index': 5,
+                'event.name': 'gen_ai.tool.message',
+            },
+            {
+                'content': [{'kind': 'text'}, {'kind': 'binary', 'media_type': 'application/pdf'}],
+                'role': 'user',
+                'gen_ai.message.index': 6,
+                'event.name': 'gen_ai.user.message',
+            },
+        ]
+    )
diff --git a/tests/test_logfire.py b/tests/test_logfire.py
@@ -525,3 +525,35 @@ async def test_feedback(capfire: CaptureLogfire) -> None:
             },
         ]
     )
+
+
+@pytest.mark.skipif(not logfire_installed, reason='logfire not installed')
+@pytest.mark.parametrize('include_content', [True, False])
+def test_include_tool_args_span_attributes(
+    get_logfire_summary: Callable[[], LogfireSummary],
+    include_content: bool,
+) -> None:
+    """Test that tool arguments are included/excluded in span attributes based on instrumentation settings."""
+
+    instrumentation_settings = InstrumentationSettings(include_content=include_content)
+    test_model = TestModel(seed=42)
+    my_agent = Agent(model=test_model, instrument=instrumentation_settings)
+
+    @my_agent.tool_plain
+    async def add_numbers(x: int, y: int) -> int:
+        """Add two numbers together."""
+        return x + y
+
+    result = my_agent.run_sync('Add 42 and 42')
+    assert result.output == snapshot('{"add_numbers":84}')
+
+    summary = get_logfire_summary()
+
+    [tool_attributes] = [
+        attributes for attributes in summary.attributes.values() if attributes.get('gen_ai.tool.name') == 'add_numbers'
+    ]
+
+    if include_content:
+        assert tool_attributes['tool_arguments'] == snapshot('{"x":42,"y":42}')
+    else:
+        assert 'tool_arguments' not in tool_attributes

Original file line number	Diff line number	Diff line change
`@@ -719,6 +719,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str \| None:`
`719`	`719`	`tracer=tracer,`
`720`	`720`	`prepare_tools=self._prepare_tools,`
`721`	`721`	`get_instructions=get_instructions,`
	`722`	`+ instrumentation_settings=instrumentation_settings,`
`722`	`723`	`)`
`723`	`724`	`start_node = _agent_graph.UserPromptNode[AgentDepsT](`
`724`	`725`	`user_prompt=user_prompt,`